📄 regex.c
字号:
Requires variables fail_stack, regstart, regend, reg_info, and num_regs be declared. GROW_FAIL_STACK requires `destination' be declared. Does `return FAILURE_CODE' if runs out of memory. */#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ do { \ char *destination; \ /* Must be int, so when we don't save any registers, the arithmetic \ of 0 + -1 isn't done as unsigned. */ \ int this_reg; \ \ DEBUG_STATEMENT (failure_id++); \ DEBUG_STATEMENT (nfailure_points_pushed++); \ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ \ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ \ /* Ensure we have enough space allocated for what we will push. */ \ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ { \ if (!GROW_FAIL_STACK (fail_stack)) \ return failure_code; \ \ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ (fail_stack).size); \ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ } \ \ /* Push the info, starting with the registers. */ \ DEBUG_PRINT1 ("\n"); \ \ if (1) \ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ this_reg++) \ { \ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ DEBUG_STATEMENT (num_regs_pushed++); \ \ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ PUSH_FAILURE_POINTER (regstart[this_reg]); \ \ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ PUSH_FAILURE_POINTER (regend[this_reg]); \ \ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ DEBUG_PRINT2 (" match_null=%d", \ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ DEBUG_PRINT2 (" matched_something=%d", \ MATCHED_SOMETHING (reg_info[this_reg])); \ DEBUG_PRINT2 (" ever_matched=%d", \ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ DEBUG_PRINT1 ("\n"); \ PUSH_FAILURE_ELT (reg_info[this_reg].word); \ } \ \ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ PUSH_FAILURE_INT (lowest_active_reg); \ \ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ PUSH_FAILURE_INT (highest_active_reg); \ \ DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ PUSH_FAILURE_POINTER (pattern_place); \ \ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ size2); \ DEBUG_PRINT1 ("'\n"); \ PUSH_FAILURE_POINTER (string_place); \ \ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ DEBUG_PUSH (failure_id); \ } while (0)/* This is the number of items that are pushed and popped on the stack for each register. */#define NUM_REG_ITEMS 3/* Individual items aside from the registers. */#ifdef DEBUG#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */#else#define NUM_NONREG_ITEMS 4#endif/* Estimate the size of data pushed by a typical failure stack entry. An estimate is all we need, because all we use this for is to choose a limit for how big to make the failure stack. */#define TYPICAL_FAILURE_SIZE 20/* This is how many items we actually use for a failure point. It depends on the regexp. */#define NUM_FAILURE_ITEMS \ (((0 \ ? 0 : highest_active_reg - lowest_active_reg + 1) \ * NUM_REG_ITEMS) \ + NUM_NONREG_ITEMS)/* How many items can still be added to the stack without overflowing it. */#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)/* Pops what PUSH_FAIL_STACK pushes. We restore into the parameters, all of which should be lvalues: STR -- the saved data position. PAT -- the saved pattern position. LOW_REG, HIGH_REG -- the highest and lowest active registers. REGSTART, REGEND -- arrays of string positions. REG_INFO -- array of information about each subexpression. Also assumes the variables `fail_stack' and (if debugging), `bufp', `pend', `string1', `size1', `string2', and `size2'. */#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\{ \ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ int this_reg; \ const unsigned char *string_temp; \ \ assert (!FAIL_STACK_EMPTY ()); \ \ /* Remove failure points and point to how many regs pushed. */ \ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ \ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ \ DEBUG_POP (&failure_id); \ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ \ /* If the saved string location is NULL, it came from an \ on_failure_keep_string_jump opcode, and we want to throw away the \ saved NULL, thus retaining our current position in the string. */ \ string_temp = POP_FAILURE_POINTER (); \ if (string_temp != NULL) \ str = (const char *) string_temp; \ \ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ DEBUG_PRINT1 ("'\n"); \ \ pat = (unsigned char *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ \ /* Restore register info. */ \ high_reg = (unsigned) POP_FAILURE_INT (); \ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ \ low_reg = (unsigned) POP_FAILURE_INT (); \ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ \ if (1) \ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ { \ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ \ reg_info[this_reg].word = POP_FAILURE_ELT (); \ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ \ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ \ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ } \ else \ { \ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ { \ reg_info[this_reg].word.integer = 0; \ regend[this_reg] = 0; \ regstart[this_reg] = 0; \ } \ highest_active_reg = high_reg; \ } \ \ set_regs_matched_done = 0; \ DEBUG_STATEMENT (nfailure_points_popped++); \} /* POP_FAILURE_POINT *//* Structure for per-register (a.k.a. per-group) information. Other register information, such as the starting and ending positions (which are addresses), and the list of inner groups (which is a bits list) are maintained in separate variables. We are making a (strictly speaking) nonportable assumption here: that the compiler will pack our bit fields into something that fits into the type of `word', i.e., is something that fits into one item on the failure stack. */typedef union{ fail_stack_elt_t word; struct { /* This field is one if this group can match the empty string, zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */#define MATCH_NULL_UNSET_VALUE 3 unsigned match_null_string_p : 2; unsigned is_active : 1; unsigned matched_something : 1; unsigned ever_matched_something : 1; } bits;} register_info_type;#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)#define IS_ACTIVE(R) ((R).bits.is_active)#define MATCHED_SOMETHING(R) ((R).bits.matched_something)#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)/* Call this when have matched a real character; it sets `matched' flags for the subexpressions which we are currently inside. Also records that those subexprs have matched. */#define SET_REGS_MATCHED() \ do \ { \ if (!set_regs_matched_done) \ { \ unsigned r; \ set_regs_matched_done = 1; \ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ { \ MATCHED_SOMETHING (reg_info[r]) \ = EVER_MATCHED_SOMETHING (reg_info[r]) \ = 1; \ } \ } \ } \ while (0)/* Registers are set to a sentinel when they haven't yet matched. */static char reg_unset_dummy;#define REG_UNSET_VALUE (®_unset_dummy)#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)/* Subroutine declarations and macros for regex_compile. */static void store_op1 (), store_op2 ();static void insert_op1 (), insert_op2 ();static boolean at_begline_loc_p (), at_endline_loc_p ();static boolean group_in_compile_stack ();static reg_errcode_t compile_range ();/* Fetch the next character in the uncompiled pattern---translating it if necessary. Also cast from a signed character in the constant string passed to us by the user to an unsigned char that we can use as an array index (in, e.g., `translate'). */#ifndef PATFETCH#define PATFETCH(c) \ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ } while (0)#endif/* Fetch the next character in the uncompiled pattern, with no translation. */#define PATFETCH_RAW(c) \ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ } while (0)/* Go backwards one character in the pattern. */#define PATUNFETCH p--/* If `translate' is non-null, return translate[D], else just D. We cast the subscript to translate because some data is declared as `char *', to avoid warnings when a string constant is passed. But when we use a character as a subscript we must make it unsigned. */#ifndef TRANSLATE#define TRANSLATE(d) \ (RE_TRANSLATE_P (translate) \ ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))#endif/* Macros for outputting the compiled pattern into `buffer'. *//* If the buffer isn't allocated when it comes in, use this. */#define INIT_BUF_SIZE 32/* Make sure we have at least N more bytes of space in buffer. */#define GET_BUFFER_SPACE(n) \ while (b - bufp->buffer + (n) > bufp->allocated) \ EXTEND_BUFFER ()/* Make sure we have one more byte of buffer space and then add C to it. */#define BUF_PUSH(c) \ do { \ GET_BUFFER_SPACE (1); \ *b++ = (unsigned char) (c); \ } while (0)/* Ensure we have two more bytes of buffer space and then append C1 and C2. */#define BUF_PUSH_2(c1, c2) \ do { \ GET_BUFFER_SPACE (2); \ *b++ = (unsigned char) (c1); \ *b++ = (unsigned char) (c2); \ } while (0)/* As with BUF_PUSH_2, except for three bytes. */#define BUF_PUSH_3(c1, c2, c3) \ do { \ GET_BUFFER_SPACE (3); \ *b++ = (unsigned char) (c1); \ *b++ = (unsigned char) (c2); \ *b++ = (unsigned char) (c3); \ } while (0)/* Store a jump with opcode OP at LOC to location TO. We store a relative address offset by the three bytes the jump itself occupies. */#define STORE_JUMP(op, loc, to) \ store_op1 (op, loc, (to) - (loc) - 3)/* Likewise, for a two-argument jump. */#define STORE_JUMP2(op, loc, to, arg) \ store_op2 (op, loc, (to) - (loc) - 3, arg)/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */#define INSERT_JUMP(op, loc, to) \ insert_op1 (op, loc, (to) - (loc) - 3, b)/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */#define INSERT_JUMP2(op, loc, to, arg) \ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)/* This is not an arbitrary limit: the arguments which represent offsets into the pattern are two bytes long. So if 2^16 bytes turns out to be too small, many things would have to change. */#define MAX_BUF_SIZE (1L << 16)/* Extend the buffer by twice its current size via realloc and reset the pointers that pointed into the old block to point to the correct places in the new one. If extending the buffer results in it being larger than MAX_BUF_SIZE, then flag memory exhausted. */#define EXTEND_BUFFER() \ do { \ unsigned char *old_buffer = bufp->buffer; \ if (bufp->allocated == MAX_BUF_SIZE) \ return REG_ESIZE; \ bufp->allocated <<= 1; \ if (bufp->allocated > MAX_BUF_SIZE) \ bufp->allocated = MAX_BUF_SIZE; \ bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ if (bufp->buffer == NULL) \ return REG_ESPACE; \ /* If the buffer moved, move all the pointers into it. */ \ if (old_buffer != bufp->buffer) \ { \ b = (b - old_buffer) + bufp->buffer; \ begalt = (begalt - old_buffer) + bufp->buffer; \ if (fixup_alt_jump) \ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ if (laststart) \ laststart = (laststart - old_buffer) + bufp->buffer; \ if (pending_exact) \ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ } \ } while (0)/* Since we have one byte reserved for the register number argument to {start,stop}_memory, the maximum number of groups we can report things about is what fits in that byte. */#define MAX_REGNUM 255/* But patterns can have more than `MAX_REGNUM' registers. We just ignore the excess. */typedef unsigned regnum_t;/* Macros for the compile stack. *//* Since offsets can go either forwards or backwards, this type needs to be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */typedef int pattern_offset_t;typedef struct{ pattern_offset_t begalt_offset; pattern_offset_t fixup_alt_jump; pattern_offset_t inner_group_offset; pattern_offset_t laststart_offset; regnum_t regnum;} compile_stack_elt_t;typedef struct{ compile_stack_elt_t *stack; unsigned size; unsigned avail; /* Offset of next open position. */} compile_stack_type;#define INIT_COMPILE_STACK_SIZE 32#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)/* The next available element. */#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -