📄 regex.c
字号:
/* Push pointer POINTER on FAIL_STACK. Return 1 if was able to do so and 0 if ran out of memory allocating space to do so. */#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ ((FAIL_STACK_FULL () \ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ ? 0 \ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1))/* Push a pointer value onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */#define PUSH_FAILURE_POINTER(item) \ fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)/* This pushes an integer-valued item onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */#define PUSH_FAILURE_INT(item) \ fail_stack.stack[fail_stack.avail++].integer = (item)/* Push a fail_stack_elt_t value onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */#define PUSH_FAILURE_ELT(item) \ fail_stack.stack[fail_stack.avail++] = (item)/* These three POP... operations complement the three PUSH... operations. All assume that `fail_stack' is nonempty. */#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]/* Used to omit pushing failure point id's when we're not debugging. */#ifdef DEBUG# define DEBUG_PUSH PUSH_FAILURE_INT# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()#else# define DEBUG_PUSH(item)# define DEBUG_POP(item_addr)#endif/* Push the information about the state we will need if we ever fail back to it. Requires variables fail_stack, regstart, regend, reg_info, and num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' be declared. Does `return FAILURE_CODE' if runs out of memory. */#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ do { \ char *destination; \ /* Must be int, so when we don't save any registers, the arithmetic \ of 0 + -1 isn't done as unsigned. */ \ /* Can't be int, since there is not a shred of a guarantee that int \ is wide enough to hold a value of something to which pointer can \ be assigned */ \ active_reg_t this_reg; \ \ DEBUG_STATEMENT (failure_id++); \ DEBUG_STATEMENT (nfailure_points_pushed++); \ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ \ DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ \ /* Ensure we have enough space allocated for what we will push. */ \ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ { \ if (!DOUBLE_FAIL_STACK (fail_stack)) \ return failure_code; \ \ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ (fail_stack).size); \ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ } \ \ /* Push the info, starting with the registers. */ \ DEBUG_PRINT1 ("\n"); \ \ if (1) \ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ this_reg++) \ { \ DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ DEBUG_STATEMENT (num_regs_pushed++); \ \ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ PUSH_FAILURE_POINTER (regstart[this_reg]); \ \ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ PUSH_FAILURE_POINTER (regend[this_reg]); \ \ DEBUG_PRINT2 (" info: %p\n ", \ reg_info[this_reg].word.pointer); \ DEBUG_PRINT2 (" match_null=%d", \ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ DEBUG_PRINT2 (" matched_something=%d", \ MATCHED_SOMETHING (reg_info[this_reg])); \ DEBUG_PRINT2 (" ever_matched=%d", \ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ DEBUG_PRINT1 ("\n"); \ PUSH_FAILURE_ELT (reg_info[this_reg].word); \ } \ \ DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ PUSH_FAILURE_INT (lowest_active_reg); \ \ DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ PUSH_FAILURE_INT (highest_active_reg); \ \ DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ PUSH_FAILURE_POINTER (pattern_place); \ \ DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ size2); \ DEBUG_PRINT1 ("'\n"); \ PUSH_FAILURE_POINTER (string_place); \ \ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ DEBUG_PUSH (failure_id); \ } while (0)/* This is the number of items that are pushed and popped on the stack for each register. */#define NUM_REG_ITEMS 3/* Individual items aside from the registers. */#ifdef DEBUG# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */#else# define NUM_NONREG_ITEMS 4#endif/* We push at most this many items on the stack. *//* We used to use (num_regs - 1), which is the number of registers this regexp will save; but that was changed to 5 to avoid stack overflow for a regexp with lots of parens. */#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)/* We actually push this many items. */#define NUM_FAILURE_ITEMS \ (((0 \ ? 0 : highest_active_reg - lowest_active_reg + 1) \ * NUM_REG_ITEMS) \ + NUM_NONREG_ITEMS)/* How many items can still be added to the stack without overflowing it. */#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)/* Pops what PUSH_FAIL_STACK pushes. We restore into the parameters, all of which should be lvalues: STR -- the saved data position. PAT -- the saved pattern position. LOW_REG, HIGH_REG -- the highest and lowest active registers. REGSTART, REGEND -- arrays of string positions. REG_INFO -- array of information about each subexpression. Also assumes the variables `fail_stack' and (if debugging), `bufp', `pend', `string1', `size1', `string2', and `size2'. */#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\{ \ DEBUG_STATEMENT (unsigned failure_id;) \ active_reg_t this_reg; \ const unsigned char *string_temp; \ \ assert (!FAIL_STACK_EMPTY ()); \ \ /* Remove failure points and point to how many regs pushed. */ \ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ \ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ \ DEBUG_POP (&failure_id); \ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ \ /* If the saved string location is NULL, it came from an \ on_failure_keep_string_jump opcode, and we want to throw away the \ saved NULL, thus retaining our current position in the string. */ \ string_temp = POP_FAILURE_POINTER (); \ if (string_temp != NULL) \ str = (const char *) string_temp; \ \ DEBUG_PRINT2 (" Popping string %p: `", str); \ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ DEBUG_PRINT1 ("'\n"); \ \ pat = (unsigned char *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ \ /* Restore register info. */ \ high_reg = (active_reg_t) POP_FAILURE_INT (); \ DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ \ low_reg = (active_reg_t) POP_FAILURE_INT (); \ DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ \ if (1) \ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ { \ DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ \ reg_info[this_reg].word = POP_FAILURE_ELT (); \ DEBUG_PRINT2 (" info: %p\n", \ reg_info[this_reg].word.pointer); \ \ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ \ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ } \ else \ { \ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ { \ reg_info[this_reg].word.integer = 0; \ regend[this_reg] = 0; \ regstart[this_reg] = 0; \ } \ highest_active_reg = high_reg; \ } \ \ set_regs_matched_done = 0; \ DEBUG_STATEMENT (nfailure_points_popped++); \} /* POP_FAILURE_POINT *//* Structure for per-register (a.k.a. per-group) information. Other register information, such as the starting and ending positions (which are addresses), and the list of inner groups (which is a bits list) are maintained in separate variables. We are making a (strictly speaking) nonportable assumption here: that the compiler will pack our bit fields into something that fits into the type of `word', i.e., is something that fits into one item on the failure stack. *//* Declarations and macros for re_match_2. */typedef union{ fail_stack_elt_t word; struct { /* This field is one if this group can match the empty string, zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */#define MATCH_NULL_UNSET_VALUE 3 unsigned match_null_string_p : 2; unsigned is_active : 1; unsigned matched_something : 1; unsigned ever_matched_something : 1; } bits;} register_info_type;#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)#define IS_ACTIVE(R) ((R).bits.is_active)#define MATCHED_SOMETHING(R) ((R).bits.matched_something)#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)/* Call this when have matched a real character; it sets `matched' flags for the subexpressions which we are currently inside. Also records that those subexprs have matched. */#define SET_REGS_MATCHED() \ do \ { \ if (!set_regs_matched_done) \ { \ active_reg_t r; \ set_regs_matched_done = 1; \ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ { \ MATCHED_SOMETHING (reg_info[r]) \ = EVER_MATCHED_SOMETHING (reg_info[r]) \ = 1; \ } \ } \ } \ while (0)/* Registers are set to a sentinel when they haven't yet matched. */static char reg_unset_dummy;#define REG_UNSET_VALUE (®_unset_dummy)#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)/* Subroutine declarations and macros for regex_compile. */static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp));static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg1, int arg2));static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg, unsigned char *end));static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end));static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p, reg_syntax_t syntax));static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, reg_syntax_t syntax));static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, const char *pend, char *translate, reg_syntax_t syntax, unsigned char *b));/* Fetch the next character in the uncompiled pattern---translating it if necessary. Also cast from a signed character in the constant string passed to us by the user to an unsigned char that we can use as an array index (in, e.g., `translate'). */#ifndef PATFETCH# define PATFETCH(c) \ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ if (translate) c = (unsigned char) translate[c]; \ } while (0)#endif/* Fetch the next character in the uncompiled pattern, with no translation. */#define PATFETCH_RAW(c) \ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ } while (0)/* Go backwards one character in the pattern. */#define PATUNFETCH p--/* If `translate' is non-null, return translate[D], else just D. We cast the subscript to translate because some data is declared as `char *', to avoid warnings when a string constant is passed. But when we use a character as a subscript we must make it unsigned. */#ifndef TRANSLATE# define TRANSLATE(d) \ (translate ? (char) translate[(unsigned char) (d)] : (d))#endif/* Macros for outputting the compiled pattern into `buffer'. *//* If the buffer isn't allocated when it comes in, use this. */#define INIT_BUF_SIZE 32/* Make sure we have at least N more bytes of space in buffer. */#define GET_BUFFER_SPACE(n) \ while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ EXTEND_BUFFER ()/* Make sure we have one more byte of buffer space and then add C to it. */#define BUF_PUSH(c) \ do { \ GET_BUFFER_SPACE (1); \ *b++ = (unsigned char) (c); \ } while (0)/* Ensure we have two more bytes of buffer space and then append C1 and C2. */#define BUF_PUSH_2(c1, c2) \ do { \ GET_BUFFER_SPACE (2); \ *b++ = (unsigned char) (c1); \ *b++ = (unsigned char) (c2); \ } while (0)/* As with BUF_PUSH_2, except for three bytes. */#define BUF_PUSH_3(c1, c2, c3) \ do { \ GET_BUFFER_SPACE (3); \ *b++ = (unsigned char) (c1); \ *b++ = (unsigned char) (c2); \ *b++ = (unsigned char) (c3); \ } while (0)/* Store a jump with opcode OP at LOC to location TO. We store a relative address offset by the three bytes the jump itself occupies. */#define STORE_JUMP(op, loc, to) \ store_op1 (op, loc, (int) ((to) - (loc) - 3))/* Likewise, for a two-argument jump. */#define STORE_JUMP2(op, loc, to, arg) \ store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */#define INSERT_JUMP(op, loc, to) \ insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */#define INSERT_JUMP2(op, loc, to, arg) \ insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -