📄 regex.cc
字号:
{ BUFPUSH (start_memory); BUFPUSH (regnum); } *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; *stackp++ = regnum++; *stackp++ = begalt - bufp->buffer; fixup_jump = 0; laststart = 0; begalt = b; break; case ')': if (obscure_syntax & RE_NO_BK_PARENS) goto normal_backsl; handle_close: if (stackp == stackb) goto unmatched_close; begalt = *--stackp + bufp->buffer; if (fixup_jump) store_jump (fixup_jump, jump, b); if (stackp[-1] < RE_NREGS) { BUFPUSH (stop_memory); BUFPUSH (stackp[-1]); } stackp -= 2; fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0; laststart = *--stackp + bufp->buffer; break; case '|': if ((obscure_syntax & RE_LIMITED_OPS) || (obscure_syntax & RE_NO_BK_VBAR)) goto normal_backsl; handle_bar: if (obscure_syntax & RE_LIMITED_OPS) goto normal_char; /* Insert before the previous alternative a jump which jumps to this alternative if the former fails. */ GET_BUFFER_SPACE (6); insert_jump (on_failure_jump, begalt, b + 6, b); pending_exact = 0; b += 3; /* The alternative before the previous alternative has a jump after it which gets executed if it gets matched. Adjust that jump so it will jump to the previous alternative's analogous jump (put in below, which in turn will jump to the next (if any) alternative's such jump, etc.). The last such jump jumps to the correct final destination. */ if (fixup_jump) store_jump (fixup_jump, jump, b); /* Leave space for a jump after previous alternative---to be filled in later. */ fixup_jump = b; b += 3; laststart = 0; begalt = b; break; case '{': if (! (obscure_syntax & RE_INTERVALS) /* Let \{ be a literal. */ || ((obscure_syntax & RE_INTERVALS) && (obscure_syntax & RE_NO_BK_CURLY_BRACES)) /* If it's the string "\{". */ || (p - 2 == pattern && p == pend)) goto normal_backsl; handle_interval: beg_interval = p - 1; /* The {. */ /* If there is no previous pattern, this isn't an interval. */ if (!laststart) { if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) goto invalid_pattern; else goto normal_backsl; } /* It also isn't an interval if not preceded by an re matching a single character or subexpression, or if the current type of intervals can't handle back references and the previous thing is a back reference. */ if (! (*laststart == anychar || *laststart == charset || *laststart == charset_not || *laststart == start_memory || (*laststart == exactn && laststart[1] == 1) || (! (obscure_syntax & RE_NO_BK_REFS) && *laststart == duplicate))) { if (obscure_syntax & RE_NO_BK_CURLY_BRACES) goto normal_char; /* Posix extended syntax is handled in previous statement; this is for Posix basic syntax. */ if (obscure_syntax & RE_INTERVALS) goto invalid_pattern; goto normal_backsl; } lower_bound = -1; /* So can see if are set. */ upper_bound = -1; GET_UNSIGNED_NUMBER (lower_bound); if (c == ',') { GET_UNSIGNED_NUMBER (upper_bound); if (upper_bound < 0) upper_bound = RE_DUP_MAX; } if (upper_bound < 0) upper_bound = lower_bound; if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES)) { if (c != '\\') goto invalid_pattern; PATFETCH (c); } if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX || lower_bound > upper_bound || ((obscure_syntax & RE_NO_BK_CURLY_BRACES) && p != pend && *p == '{')) { if (obscure_syntax & RE_NO_BK_CURLY_BRACES) goto unfetch_interval; else goto invalid_pattern; } /* If upper_bound is zero, don't want to succeed at all; jump from laststart to b + 3, which will be the end of the buffer after this jump is inserted. */ if (upper_bound == 0) { GET_BUFFER_SPACE (3); insert_jump (jump, laststart, b + 3, b); b += 3; } /* Otherwise, after lower_bound number of succeeds, jump to after the jump_n which will be inserted at the end of the buffer, and insert that jump_n. */ else { /* Set to 5 if only one repetition is allowed and hence no jump_n is inserted at the current end of the buffer; then only space for the succeed_n is needed. Otherwise, need space for both the succeed_n and the jump_n. */ unsigned slots_needed = upper_bound == 1 ? 5 : 10; GET_BUFFER_SPACE ((int) slots_needed); /* Initialize the succeed_n to n, even though it will be set by its attendant set_number_at, because re_compile_fastmap will need to know it. Jump to what the end of buffer will be after inserting this succeed_n and possibly appending a jump_n. */ insert_jump_n (succeed_n, laststart, b + slots_needed, b, lower_bound); b += 5; /* Just increment for the succeed_n here. */ /* More than one repetition is allowed, so put in at the end of the buffer a backward jump from b to the succeed_n we put in above. By the time we've gotten to this jump when matching, we'll have matched once already, so jump back only upper_bound - 1 times. */ if (upper_bound > 1) { store_jump_n (b, jump_n, laststart, upper_bound - 1); b += 5; /* When hit this when matching, reset the preceding jump_n's n to upper_bound - 1. */ BUFPUSH (set_number_at); GET_BUFFER_SPACE (2); STORE_NUMBER_AND_INCR (b, -5); STORE_NUMBER_AND_INCR (b, upper_bound - 1); } /* When hit this when matching, set the succeed_n's n. */ GET_BUFFER_SPACE (5); insert_op_2 (set_number_at, laststart, b, 5, lower_bound); b += 5; } pending_exact = 0; beg_interval = 0; break; unfetch_interval: /* If an invalid interval, match the characters as literals. */ if (beg_interval) p = beg_interval; else { fprintf (stderr, "regex: no interval beginning to which to backtrack.\n"); exit (1); } beg_interval = 0; PATFETCH (c); /* normal_char expects char in `c'. */ goto normal_char; break;#ifdef emacs case '=': BUFPUSH (at_dot); break; case 's': laststart = b; BUFPUSH (syntaxspec); PATFETCH (c); BUFPUSH (syntax_spec_code[c]); break; case 'S': laststart = b; BUFPUSH (notsyntaxspec); PATFETCH (c); BUFPUSH (syntax_spec_code[c]); break;#endif /* emacs */ case 'w': laststart = b; BUFPUSH (wordchar); break; case 'W': laststart = b; BUFPUSH (notwordchar); break; case '<': BUFPUSH (wordbeg); break; case '>': BUFPUSH (wordend); break; case 'b': BUFPUSH (wordbound); break; case 'B': BUFPUSH (notwordbound); break; case '`': BUFPUSH (begbuf); break; case '\'': BUFPUSH (endbuf); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (obscure_syntax & RE_NO_BK_REFS) goto normal_char; c1 = c - '0'; if (c1 >= regnum) { if (obscure_syntax & RE_NO_EMPTY_BK_REF) goto invalid_pattern; else goto normal_char; } /* Can't back reference to a subexpression if inside of it. */ for (stackt = stackp - 2; stackt > stackb; stackt -= 4) if (*stackt == c1) goto normal_char; laststart = b; BUFPUSH (duplicate); BUFPUSH (c1); break; case '+': case '?': if (obscure_syntax & RE_BK_PLUS_QM) goto handle_plus; else goto normal_backsl; break; default: normal_backsl: /* You might think it would be useful for \ to mean not to translate; but if we don't translate it it will never match anything. */ if (translate) c = translate[c]; goto normal_char; } break; default: normal_char: /* Expects the character in `c'. */ if (!pending_exact || pending_exact + *pending_exact + 1 != b || *pending_exact == 0177 || *p == '*' || *p == '^' || ((obscure_syntax & RE_BK_PLUS_QM) ? *p == '\\' && (p[1] == '+' || p[1] == '?') : (*p == '+' || *p == '?')) || ((obscure_syntax & RE_INTERVALS) && ((obscure_syntax & RE_NO_BK_CURLY_BRACES) ? *p == '{' : (p[0] == '\\' && p[1] == '{')))) { laststart = b; BUFPUSH (exactn); pending_exact = b; BUFPUSH (0); } BUFPUSH (c); (*pending_exact)++; } } if (fixup_jump) store_jump (fixup_jump, jump, b); if (stackp != stackb) goto unmatched_open; bufp->used = b - bufp->buffer; return 0; invalid_pattern: return "Invalid regular expression"; unmatched_open: return "Unmatched \\("; unmatched_close: return "Unmatched \\)"; end_of_pattern: return "Premature end of regular expression"; nesting_too_deep: return "Nesting too deep"; too_big: return "Regular expression too big"; memory_exhausted: return "Memory exhausted";}/* Store a jump of the form <OPCODE> <relative address>. Store in the location FROM a jump operation to jump to relative address FROM - TO. OPCODE is the opcode to store. */static voidstore_jump (char *from, char opcode, char *to){ from[0] = opcode; STORE_NUMBER(from + 1, to - (from + 3));}/* Open up space before char FROM, and insert there a jump to TO. CURRENT_END gives the end of the storage not in use, so we know how much data to copy up. OP is the opcode of the jump to insert. If you call this function, you must zero out pending_exact. */static voidinsert_jump (char op, char *from, char *to, char *current_end){ register char *pfrom = current_end; /* Copy from here... */ register char *pto = current_end + 3; /* ...to here. */ while (pfrom != from) *--pto = *--pfrom; store_jump (from, op, to);}/* Store a jump of the form <opcode> <relative address> <n> . Store in the location FROM a jump operation to jump to relative address FROM - TO. OPCODE is the opcode to store, N is a number the jump uses, say, to decide how many times to jump. If you call this function, you must zero out pending_exact. */static voidstore_jump_n (char *from, char opcode, char *to, unsigned n){ from[0] = opcode; STORE_NUMBER (from + 1, to - (from + 3)); STORE_NUMBER (from + 3, n);}/* Similar to insert_jump, but handles a jump which needs an extra number to handle minimum and maximum cases. Open up space at location FROM, and insert there a jump to TO. CURRENT_END gives the end of the storage in use, so we know how much data to copy up. OP is the opcode of the jump to insert. If you call this function, you must zero out pending_exact. */static voidinsert_jump_n (char op, char *from, char *to, char *current_end, unsigned n){ register char *pfrom = current_end; /* Copy from here... */ register char *pto = current_end + 5; /* ...to here. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -