📄 gnuregex.c
字号:
case '=': BUF_PUSH(at_dot); break; case 's': laststart = b; PATFETCH(c); BUF_PUSH_2(syntaxspec, syntax_spec_code[c]); break; case 'S': laststart = b; PATFETCH(c); BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]); break;#endif /* emacs */ case 'w': laststart = b; BUF_PUSH(wordchar); break; case 'W': laststart = b; BUF_PUSH(notwordchar); break; case '<': BUF_PUSH(wordbeg); break; case '>': BUF_PUSH(wordend); break; case 'b': BUF_PUSH(wordbound); break; case 'B': BUF_PUSH(notwordbound); break; case '`': BUF_PUSH(begbuf); break; case '\'': BUF_PUSH(endbuf); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (syntax & RE_NO_BK_REFS) goto normal_char; c1 = c - '0'; if (c1 > regnum) return REG_ESUBREG; /* Can't back reference to a subexpression if inside of it. */ if (group_in_compile_stack(compile_stack, c1)) goto normal_char; laststart = b; BUF_PUSH_2(duplicate, c1); break; case '+': case '?': if (syntax & RE_BK_PLUS_QM) goto handle_plus; else goto normal_backslash; default: normal_backslash: /* You might think it would be useful for \ to mean * not to translate; but if we don't translate it * it will never match anything. */ c = TRANSLATE(c); goto normal_char; } break; default: /* Expects the character in `c'. */ normal_char: /* If no exactn currently being built. */ if (!pending_exact /* If last exactn not at current position. */ || pending_exact + *pending_exact + 1 != b /* We have only one byte following the exactn for the count. */ || *pending_exact == (1 << BYTEWIDTH) - 1 /* If followed by a repetition operator. */ || *p == '*' || *p == '^' || ((syntax & RE_BK_PLUS_QM) ? *p == '\\' && (p[1] == '+' || p[1] == '?') : (*p == '+' || *p == '?')) || ((syntax & RE_INTERVALS) && ((syntax & RE_NO_BK_BRACES) ? *p == '{' : (p[0] == '\\' && p[1] == '{')))) { /* Start building a new exactn. */ laststart = b; BUF_PUSH_2(exactn, 0); pending_exact = b - 1; } BUF_PUSH(c); (*pending_exact)++; break; } /* switch (c) */ } /* while p != pend */ /* Through the pattern now. */ if (fixup_alt_jump) STORE_JUMP(jump_past_alt, fixup_alt_jump, b); if (!COMPILE_STACK_EMPTY) return REG_EPAREN; free(compile_stack.stack); /* We have succeeded; set the length of the buffer. */ bufp->used = b - bufp->buffer;#ifdef DEBUG if (debug) { DEBUG_PRINT1("\nCompiled pattern: "); print_compiled_pattern(bufp); }#endif /* DEBUG */ return REG_NOERROR;} /* regex_compile *//* Subroutines for `regex_compile'. *//* Store OP at LOC followed by two-byte integer parameter ARG. */static voidstore_op1(op, loc, arg) re_opcode_t op; unsigned char *loc; int arg;{ *loc = (unsigned char) op; STORE_NUMBER(loc + 1, arg);}/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */static voidstore_op2(op, loc, arg1, arg2) re_opcode_t op; unsigned char *loc; int arg1, arg2;{ *loc = (unsigned char) op; STORE_NUMBER(loc + 1, arg1); STORE_NUMBER(loc + 3, arg2);}/* Copy the bytes from LOC to END to open up three bytes of space at LOC * for OP followed by two-byte integer parameter ARG. */static voidinsert_op1(op, loc, arg, end) re_opcode_t op; unsigned char *loc; int arg; unsigned char *end;{ register unsigned char *pfrom = end; register unsigned char *pto = end + 3; while (pfrom != loc) *--pto = *--pfrom; store_op1(op, loc, arg);}/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */static voidinsert_op2(op, loc, arg1, arg2, end) re_opcode_t op; unsigned char *loc; int arg1, arg2; unsigned char *end;{ register unsigned char *pfrom = end; register unsigned char *pto = end + 5; while (pfrom != loc) *--pto = *--pfrom; store_op2(op, loc, arg1, arg2);}/* P points to just after a ^ in PATTERN. Return true if that ^ comes * after an alternative or a begin-subexpression. We assume there is at * least one character before the ^. */static booleanat_begline_loc_p(pattern, p, syntax) const char *pattern, *p; reg_syntax_t syntax;{ const char *prev = p - 2; boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; return /* After a subexpression? */ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) /* After an alternative? */ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));}/* The dual of at_begline_loc_p. This one is for $. We assume there is * at least one character after the $, i.e., `P < PEND'. */static booleanat_endline_loc_p(p, pend, syntax) const char *p, *pend; int syntax;{ const char *next = p; boolean next_backslash = *next == '\\'; const char *next_next = p + 1 < pend ? p + 1 : NULL; return /* Before a subexpression? */ (syntax & RE_NO_BK_PARENS ? *next == ')' : next_backslash && next_next && *next_next == ')') /* Before an alternative? */ || (syntax & RE_NO_BK_VBAR ? *next == '|' : next_backslash && next_next && *next_next == '|');}/* Returns true if REGNUM is in one of COMPILE_STACK's elements and * false if it's not. */static booleangroup_in_compile_stack(compile_stack, regnum) compile_stack_type compile_stack; regnum_t regnum;{ int this_element; for (this_element = compile_stack.avail - 1; this_element >= 0; this_element--) if (compile_stack.stack[this_element].regnum == regnum) return true; return false;}/* Read the ending character of a range (in a bracket expression) from the * uncompiled pattern *P_PTR (which ends at PEND). We assume the * starting character is in `P[-2]'. (`P[-1]' is the character `-'.) * Then we set the translation of all bits between the starting and * ending characters (inclusive) in the compiled pattern B. * * Return an error code. * * We use these short variable names so we can use the same macros as * `regex_compile' itself. */static reg_errcode_tcompile_range(p_ptr, pend, translate, syntax, b) const char **p_ptr, *pend; char *translate; reg_syntax_t syntax; unsigned char *b;{ unsigned this_char; const char *p = *p_ptr; int range_start, range_end; if (p == pend) return REG_ERANGE; /* Even though the pattern is a signed `char *', we need to fetch * with unsigned char *'s; if the high bit of the pattern character * is set, the range endpoints will be negative if we fetch using a * signed char *. * * We also want to fetch the endpoints without translating them; the * appropriate translation is done in the bit-setting loop below. */ range_start = ((unsigned char *) p)[-2]; range_end = ((unsigned char *) p)[0]; /* Have to increment the pointer into the pattern string, so the * caller isn't still at the ending character. */ (*p_ptr)++; /* If the start is after the end, the range is empty. */ if (range_start > range_end) return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; /* Here we see why `this_char' has to be larger than an `unsigned * char' -- the range is inclusive, so if `range_end' == 0xff * (assuming 8-bit characters), we would otherwise go into an infinite * loop, since all characters <= 0xff. */ for (this_char = range_start; this_char <= range_end; this_char++) { SET_LIST_BIT(TRANSLATE(this_char)); } return REG_NOERROR;}/* Failure stack declarations and macros; both re_compile_fastmap and * re_match_2 use a failure stack. These have to be macros because of * REGEX_ALLOCATE. *//* Number of failure points for which to initially allocate space * when matching. If this number is exceeded, we allocate more * space, so it is not a hard limit. */#ifndef INIT_FAILURE_ALLOC#define INIT_FAILURE_ALLOC 5#endif/* Roughly the maximum number of failure points on the stack. Would be * exactly that if always used MAX_FAILURE_SPACE each time we failed. * This is a variable only so users of regex can assign to it; we never * change it ourselves. */int re_max_failures = 2000;typedef const unsigned char *fail_stack_elt_t;typedef struct { fail_stack_elt_t *stack; unsigned size; unsigned avail; /* Offset of next open position. */} fail_stack_type;#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */#define INIT_FAIL_STACK() \ do { \ fail_stack.stack = (fail_stack_elt_t *) \ REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ \ if (fail_stack.stack == NULL) \ return -2; \ \ fail_stack.size = INIT_FAILURE_ALLOC; \ fail_stack.avail = 0; \ } while (0)/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. * * Return 1 if succeeds, and 0 if either ran out of memory * allocating space for it or it was already too large. * * REGEX_REALLOCATE requires `destination' be declared. */#define DOUBLE_FAIL_STACK(fail_stack) \ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ ? 0 \ : ((fail_stack).stack = (fail_stack_elt_t *) \ REGEX_REALLOCATE ((fail_stack).stack, \ (fail_stack).size * sizeof (fail_stack_elt_t), \ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ \ (fail_stack).stack == NULL \ ? 0 \ : ((fail_stack).size <<= 1, \ 1)))/* Push PATTERN_OP on FAIL_STACK. * * Return 1 if was able to do so and 0 if ran out of memory allocating * space to do so. */#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ ((FAIL_STACK_FULL () \ && !DOUBLE_FAIL_STACK (fail_stack)) \ ? 0 \ : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ 1))/* This pushes an item onto the failure stack. Must be a four-byte * value. Assumes the variable `fail_stack'. Probably should only * be called from within `PUSH_FAILURE_POINT'. */#define PUSH_FAILURE_ITEM(item) \ fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item/* The complement operation. Assumes `fail_stack' is nonempty. */#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]/* Used to omit pushing failure point id's when we're not debugging. */#ifdef DEBUG#define DEBUG_PUSH PUSH_FAILURE_ITEM#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()#else#define DEBUG_PUSH(item)#define DEBUG_POP(item_addr)#endif/* Push the information about the state we will need * if we ever fail back to it. * * Requires variables fail_stack, regstart, regend, reg_info, and * num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be * declared. * * Does `return FAILURE_CODE' if runs out of memory. */#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ do { \ char *destination; \ /* Must be int, so when we don't save any registers, the arithmetic \ of 0 + -1 isn't done as unsigned. */ \ int this_reg; \ \ DEBUG_STATEMENT (failure_id++); \ DEBUG_STATEMENT (nfailure_points_pushed++); \ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ DEBUG_PRINT2 (" Bef
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -