📄 x_regex.c
字号:
b += 5; /* The location we want to set is the second parameter of the `jump_n'; that is `b-2' as an absolute address. `laststart' will be the `set_number_at' we're about to insert; `laststart+3' the number to set, the source for the relative address. But we are inserting into the middle of the pattern -- so everything is getting moved up by 5. Conclusion: (b - 2) - (laststart + 3) + 5, i.e., b - laststart. We insert this at the beginning of the loop so that if we fail during matching, we'll reinitialize the bounds. */ insert_op2 (set_number_at, laststart, b - laststart, upper_bound - 1, b); b += 5; } } pending_exact = 0; beg_interval = NULL; } break; unfetch_interval: /* If an invalid interval, match the characters as literals. */ assert (beg_interval); p = beg_interval; beg_interval = NULL; /* normal_char and normal_backslash need `c'. */ PATFETCH (c); if (!(syntax & RE_NO_BK_BRACES)) { if (p > pattern && p[-1] == '\\') goto normal_backslash; } goto normal_char;#ifdef emacs /* There is no way to specify the before_dot and after_dot operators. rms says this is ok. --karl */ case '=': BUF_PUSH (at_dot); break; case 's': laststart = b; PATFETCH (c); BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); break; case 'S': laststart = b; PATFETCH (c); BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); break;#endif /* emacs */ case 'w': laststart = b; BUF_PUSH (wordchar); break; case 'W': laststart = b; BUF_PUSH (notwordchar); break; case '<': BUF_PUSH (wordbeg); break; case '>': BUF_PUSH (wordend); break; case 'b': BUF_PUSH (wordbound); break; case 'B': BUF_PUSH (notwordbound); break; case '`': BUF_PUSH (begbuf); break; case '\'': BUF_PUSH (endbuf); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (syntax & RE_NO_BK_REFS) goto normal_char; c1 = c - '0'; if (c1 > regnum) return REG_ESUBREG; /* Can't back reference to a subexpression if inside of it. */ if (group_in_compile_stack (compile_stack, (regnum_t) c1)) goto normal_char; laststart = b; BUF_PUSH_2 (duplicate, c1); break; case '+': case '?': if (syntax & RE_BK_PLUS_QM) goto handle_plus; else goto normal_backslash; default: normal_backslash: /* You might think it would be useful for \ to mean not to translate; but if we don't translate it it will never match anything. */ c = TRANSLATE (c); goto normal_char; } break; default: /* Expects the character in `c'. */ normal_char: /* If no exactn currently being built. */ if (!pending_exact /* If last exactn not at current position. */ || pending_exact + *pending_exact + 1 != b /* We have only one byte following the exactn for the count. */ || *pending_exact == (UChar)(1 << (BYTEWIDTH * sizeof(UChar) - 1)) /* If followed by a repetition operator. */ || *p == '*' || *p == '^' || ((syntax & RE_BK_PLUS_QM) ? *p == '\\' && (p[1] == '+' || p[1] == '?') : (*p == '+' || *p == '?')) || ((syntax & RE_INTERVALS) && ((syntax & RE_NO_BK_BRACES) ? *p == '{' : (p[0] == '\\' && p[1] == '{')))) { /* Start building a new exactn. */ laststart = b; BUF_PUSH_2 (exactn, 0); pending_exact = b - 1; } BUF_PUSH (c); (*pending_exact)++; break; } /* switch (c) */ } /* while p != pend */ /* Through the pattern now. */ if (fixup_alt_jump) STORE_JUMP (jump_past_alt, fixup_alt_jump, b); if (!COMPILE_STACK_EMPTY) return REG_EPAREN; free (compile_stack.stack); /* We have succeeded; set the length of the buffer. */ bufp->used = b - bufp->buffer;#ifdef DEBUG if (debug) { DEBUG_PRINT1 ("\nCompiled pattern: "); print_compiled_pattern (bufp); }#endif /* DEBUG */ return REG_NOERROR;} /* regex_compile *//* Subroutines for `regex_compile'. *//* Store OP at LOC followed by two-byte integer parameter ARG. */static voidstore_op1 (op, loc, arg) re_opcode_t op; UChar *loc; Int32 arg;{ *loc = (UChar) op; STORE_NUMBER (loc + 1, arg);}/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */static voidstore_op2 (op, loc, arg1, arg2) re_opcode_t op; UChar *loc; Int32 arg1, arg2;{ *loc = (UChar) op; STORE_NUMBER (loc + 1, arg1); STORE_NUMBER (loc + 3, arg2);}/* Copy the bytes from LOC to END to open up three bytes of space at LOC for OP followed by two-byte integer parameter ARG. */static voidinsert_op1 (op, loc, arg, end) re_opcode_t op; UChar *loc; Int32 arg; UChar *end; { register UChar *pfrom = end; register UChar *pto = end + 3; while (pfrom != loc) *--pto = *--pfrom; store_op1 (op, loc, arg);}/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */static voidinsert_op2 (op, loc, arg1, arg2, end) re_opcode_t op; UChar *loc; Int32 arg1, arg2; UChar *end; { register UChar *pfrom = end; register UChar *pto = end + 5; while (pfrom != loc) *--pto = *--pfrom; store_op2 (op, loc, arg1, arg2);}/* P points to just after a ^ in PATTERN. Return true if that ^ comes after an alternative or a begin-subexpression. We assume there is at least one character before the ^. */static booleanat_begline_loc_p (pattern, p, syntax) UChar *pattern, *p; reg_syntax_t syntax;{ UChar *prev = p - 2; boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; return /* After a subexpression? */ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) /* After an alternative? */ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));}/* The dual of at_begline_loc_p. This one is for $. We assume there is at least one character after the $, i.e., `P < PEND'. */static booleanat_endline_loc_p (p, pend, syntax) UChar *p, *pend; Int32 syntax;{ UChar *next = p; boolean next_backslash = *next == '\\'; UChar *next_next = p + 1 < pend ? p + 1 : NULL; return /* Before a subexpression? */ (syntax & RE_NO_BK_PARENS ? *next == ')' : next_backslash && next_next && *next_next == ')') /* Before an alternative? */ || (syntax & RE_NO_BK_VBAR ? *next == '|' : next_backslash && next_next && *next_next == '|');}/* Returns true if REGNUM is in one of COMPILE_STACK's elements and false if it's not. */static booleangroup_in_compile_stack (compile_stack, regnum) compile_stack_type compile_stack; regnum_t regnum;{ Int32 this_element; for (this_element = compile_stack.avail - 1; this_element >= 0; this_element--) if (compile_stack.stack[this_element].regnum == regnum) return true; return false;}/* Read the ending character of a range (in a bracket expression) from the uncompiled pattern *P_PTR (which ends at PEND). We assume the starting character is in `P[-2]'. (`P[-1]' is the character `-'.) Then we set the translation of all bits between the starting and ending characters (inclusive) in the compiled pattern B. Return an error code. We use these short variable names so we can use the same macros as `regex_compile' itself. */static reg_errcode_tcompile_range (p_ptr, pend, translate, syntax, b) UChar **p_ptr, *pend; UChar *translate; reg_syntax_t syntax; UChar *b;{ Uns32 this_char; UChar *p = *p_ptr; Int32 range_start, range_end; if (p == pend) return REG_ERANGE; /* Even though the pattern is a signed `char *', we need to fetch with unsigned char *'s; if the high bit of the pattern character is set, the range endpoints will be negative if we fetch using a signed char *. We also want to fetch the endpoints without translating them; the appropriate translation is done in the bit-setting loop below. */ range_start = ((UChar *) p)[-2]; range_end = ((UChar *) p)[0]; /* Have to increment the pointer into the pattern string, so the caller isn't still at the ending character. */ (*p_ptr)++; /* If the start is after the end, the range is empty. */ if (range_start > range_end) return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; /* Here we see why `this_char' has to be larger than an `unsigned char' -- the range is inclusive, so if `range_end' == 0xff (assuming 8-bit characters), we would otherwise go into an infinite loop, since all characters <= 0xff. */ for (this_char = range_start; this_char <= range_end; this_char++) { SET_LIST_BIT (TRANSLATE (this_char)); } return REG_NOERROR;}/* Failure stack declarations and macros; both re_compile_fastmap and re_match_2 use a failure stack. These have to be macros because of REGEX_ALLOCATE. */ /* Number of failure points for which to initially allocate space when matching. If this number is exceeded, we allocate more space, so it is not a hard limit. */#ifndef INIT_FAILURE_ALLOC#define INIT_FAILURE_ALLOC 5#endif/* Roughly the maximum number of failure points on the stack. Would be exactly that if always used MAX_FAILURE_SPACE each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */Int32 re_max_failures = 2000;typedef UChar *fail_stack_elt_t;typedef struct{ fail_stack_elt_t *stack; Uns32 size; Uns32 avail; /* Offset of next open position. */} fail_stack_type;#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */#define INIT_FAIL_STACK() \ do { \ fail_stack.stack = (fail_stack_elt_t *) \ REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ \ if (fail_stack.stack == NULL) \ return -2; \ \ fail_stack.size = INIT_FAILURE_ALLOC; \ fail_stack.avail = 0; \ } while (0)/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. Return 1 if succeeds, and 0 if either ran out of memory allocating space for it or it was already too large. REGEX_REALLOCATE requires `destination' be declared. */#define Real64_FAIL_STACK(fail_stack) \ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ ? 0 \ : ((fail_stack).stack = (fail_stack_elt_t *) \ REGEX_REALLOCATE ((fail_stack).stack, \ (fail_stack).size * sizeof (fail_stack_elt_t), \ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ \ (fail_stack).stack == NULL \ ? 0 \ : ((fail_stack).size <<= 1, \ 1)))/* Push PATTERN_OP on FAIL_STACK. Return 1 if was able to do so and 0 if ran out of memory allocating space to do so. */#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ ((FAIL_STACK_FULL () \ && !Real64_FAIL_STACK (fail_stack)) \ ? 0 \ : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ 1))/* This pushes an item onto the failure stack. Must be a four-byte value. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */#define PUSH_FAILURE_ITEM(item) \ fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item/* The complement operation. Assumes `fail_stack' is nonempty. */#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]/* Used to omit pushing failure point id's when we're not debugging. */#ifdef DEBUG#define DEBUG_PUSH PUSH_FAILURE_ITEM#define DEBUG_POP(
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -