📄 regex.c
字号:
/* This has no initializer because initialized variables in Emacs become read-only after dumping. */reg_syntax_t re_syntax_options;/* Specify the precise syntax of regexps for compilation. This provides for compatibility for various utilities which historically have different, incompatible syntaxes. The argument SYNTAX is a bit mask comprised of the various bits defined in regex.h. We return the old syntax. */reg_syntax_tre_set_syntax (syntax) reg_syntax_t syntax;{ reg_syntax_t ret = re_syntax_options; re_syntax_options = syntax;#ifdef DEBUG if (syntax & RE_DEBUG) debug = 1; else if (debug) /* was on but now is not */ debug = 0;#endif /* DEBUG */ return ret;}#ifdef _LIBCweak_alias (__re_set_syntax, re_set_syntax)#endif/* This table gives an error message for each of the error codes listed in regex.h. Obviously the order here has to be same as there. POSIX doesn't require that we do anything for REG_NOERROR, but why not be nice? */static const char re_error_msgid[] = {#define REG_NOERROR_IDX 0 gettext_noop ("Success") /* REG_NOERROR */ "\0"#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") gettext_noop ("No match") /* REG_NOMATCH */ "\0"#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") gettext_noop ("Invalid regular expression") /* REG_BADPAT */ "\0"#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ "\0"#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") gettext_noop ("Invalid character class name") /* REG_ECTYPE */ "\0"#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") gettext_noop ("Trailing backslash") /* REG_EESCAPE */ "\0"#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") gettext_noop ("Invalid back reference") /* REG_ESUBREG */ "\0"#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ "\0"#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ "\0"#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") gettext_noop ("Unmatched \\{") /* REG_EBRACE */ "\0"#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ "\0"#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") gettext_noop ("Invalid range end") /* REG_ERANGE */ "\0"#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") gettext_noop ("Memory exhausted") /* REG_ESPACE */ "\0"#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ "\0"#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") gettext_noop ("Premature end of regular expression") /* REG_EEND */ "\0"#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") gettext_noop ("Regular expression too big") /* REG_ESIZE */ "\0"#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ };static const size_t re_error_msgid_idx[] = { REG_NOERROR_IDX, REG_NOMATCH_IDX, REG_BADPAT_IDX, REG_ECOLLATE_IDX, REG_ECTYPE_IDX, REG_EESCAPE_IDX, REG_ESUBREG_IDX, REG_EBRACK_IDX, REG_EPAREN_IDX, REG_EBRACE_IDX, REG_BADBR_IDX, REG_ERANGE_IDX, REG_ESPACE_IDX, REG_BADRPT_IDX, REG_EEND_IDX, REG_ESIZE_IDX, REG_ERPAREN_IDX };/* Avoiding alloca during matching, to placate r_alloc. *//* Define MATCH_MAY_ALLOCATE unless we need to make sure that the searching and matching functions should not call alloca. On some systems, alloca is implemented in terms of malloc, and if we're using the relocating allocator routines, then malloc could cause a relocation, which might (if the strings being searched are in the ralloc heap) shift the data out from underneath the regexp routines. Here's another reason to avoid allocation: Emacs processes input from X in a signal handler; processing X input may call malloc; if input arrives while a matching routine is calling malloc, then we're scrod. But Emacs can't just block input while calling matching routines; then we don't notice interrupts when they come in. So, Emacs blocks input around all regexp calls except the matching calls, which it leaves unprotected, in the faith that they will not malloc. *//* Normally, this is fine. */#define MATCH_MAY_ALLOCATE/* When using GNU C, we are not REALLY using the C alloca, no matter what config.h may say. So don't take precautions for it. */#ifdef __GNUC__# undef C_ALLOCA#endif/* The match routines may not allocate if (1) they would do it with malloc and (2) it's not safe for them to use malloc. Note that if REL_ALLOC is defined, matching would not use malloc for the failure stack, but we would still use it for the register vectors; so REL_ALLOC should not affect this. */#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs# undef MATCH_MAY_ALLOCATE#endif/* Failure stack declarations and macros; both re_compile_fastmap and re_match_2 use a failure stack. These have to be macros because of REGEX_ALLOCATE_STACK. *//* Number of failure points for which to initially allocate space when matching. If this number is exceeded, we allocate more space, so it is not a hard limit. */#ifndef INIT_FAILURE_ALLOC# define INIT_FAILURE_ALLOC 5#endif/* Roughly the maximum number of failure points on the stack. Would be exactly that if always used MAX_FAILURE_ITEMS items each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */#ifdef INT_IS_16BIT# if defined MATCH_MAY_ALLOCATE/* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */long int re_max_failures = 4000;# elselong int re_max_failures = 2000;# endifunion fail_stack_elt{ US_CHAR_TYPE *pointer; long int integer;};typedef union fail_stack_elt fail_stack_elt_t;typedef struct{ fail_stack_elt_t *stack; unsigned long int size; unsigned long int avail; /* Offset of next open position. */} fail_stack_type;#else /* not INT_IS_16BIT */# if defined MATCH_MAY_ALLOCATE/* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */int re_max_failures = 4000;# elseint re_max_failures = 2000;# endifunion fail_stack_elt{ US_CHAR_TYPE *pointer; int integer;};typedef union fail_stack_elt fail_stack_elt_t;typedef struct{ fail_stack_elt_t *stack; unsigned size; unsigned avail; /* Offset of next open position. */} fail_stack_type;#endif /* INT_IS_16BIT */#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)/* Define macros to initialize and free the failure stack. Do `return -2' if the alloc fails. */#ifdef MATCH_MAY_ALLOCATE# define INIT_FAIL_STACK() \ do { \ fail_stack.stack = (fail_stack_elt_t *) \ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ \ if (fail_stack.stack == NULL) \ return -2; \ \ fail_stack.size = INIT_FAILURE_ALLOC; \ fail_stack.avail = 0; \ } while (0)# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)#else# define INIT_FAIL_STACK() \ do { \ fail_stack.avail = 0; \ } while (0)# define RESET_FAIL_STACK()#endif/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. Return 1 if succeeds, and 0 if either ran out of memory allocating space for it or it was already too large. REGEX_REALLOCATE_STACK requires `destination' be declared. */#define DOUBLE_FAIL_STACK(fail_stack) \ ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ ? 0 \ : ((fail_stack).stack = (fail_stack_elt_t *) \ REGEX_REALLOCATE_STACK ((fail_stack).stack, \ (fail_stack).size * sizeof (fail_stack_elt_t), \ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ \ (fail_stack).stack == NULL \ ? 0 \ : ((fail_stack).size <<= 1, \ 1)))/* Push pointer POINTER on FAIL_STACK. Return 1 if was able to do so and 0 if ran out of memory allocating space to do so. */#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ ((FAIL_STACK_FULL () \ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ ? 0 \ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1))/* Push a pointer value onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */#define PUSH_FAILURE_POINTER(item) \ fail_stack.stack[fail_stack.avail++].pointer = (US_CHAR_TYPE *) (item)/* This pushes an integer-valued item onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */#define PUSH_FAILURE_INT(item) \ fail_stack.stack[fail_stack.avail++].integer = (item)/* Push a fail_stack_elt_t value onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */#define PUSH_FAILURE_ELT(item) \ fail_stack.stack[fail_stack.avail++] = (item)/* These three POP... operations complement the three PUSH... operations. All assume that `fail_stack' is nonempty. */#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]/* Used to omit pushing failure point id's when we're not debugging. */#ifdef DEBUG# define DEBUG_PUSH PUSH_FAILURE_INT# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()#else# define DEBUG_PUSH(item)# define DEBUG_POP(item_addr)#endif/* Push the information about the state we will need if we ever fail back to it. Requires variables fail_stack, regstart, regend, reg_info, and num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' be declared. Does `return FAILURE_CODE' if runs out of memory. */#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ do { \ char *destination; \ /* Must be int, so when we don't save any registers, the arithmetic \ of 0 + -1 isn't done as unsigned. */ \ /* Can't be int, since there is not a shred of a guarantee that int \ is wide enough to hold a value of something to which pointer can \ be assigned */ \ active_reg_t this_reg; \ \ DEBUG_STATEMENT (failure_id++); \ DEBUG_STATEMENT (nfailure_points_pushed++); \ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ \ DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ \ /* Ensure we have enough space allocated for what we will push. */ \ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ { \ if (!DOUBLE_FAIL_STACK (fail_stack)) \ return failure_code; \ \ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ (fail_stack).size); \ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ } \ \ /* Push the info, starting with the registers. */ \ DEBUG_PRINT1 ("\n"); \ \ if (1) \ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ this_reg++) \ { \ DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ DEBUG_STATEMENT (num_regs_pushed++); \ \ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ PUSH_FAILURE_POINTER (regstart[this_reg]); \ \ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ PUSH_FAILURE_POINTER (regend[this_reg]); \ \ DEBUG_PRINT2 (" info: %p\n ", \ reg_info[this_reg].word.pointer); \ DEBUG_PRINT2 (" match_null=%d", \ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ DEBUG_PRINT2 (" matched_something=%d", \ MATCHED_SOMETHING (reg_info[this_reg])); \ DEBUG_PRINT2 (" ever_matched=%d", \ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ DEBUG_PRINT1 ("\n"); \ PUSH_FAILURE_ELT (reg_info[this_reg].word); \ } \ \ DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ PUSH_FAILURE_INT (lowest_active_reg); \ \ DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ PUSH_FAILURE_INT (highest_active_reg); \ \ DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ PUSH_FAILURE_POINTER (pattern_place); \ \ DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ size2); \ DEBUG_PRINT1 ("'\n"); \ PUSH_FAILURE_POINTER (string_place); \ \ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ DEBUG_PUSH (failure_id); \ } while (0)/* This is the number of items that are pushed and popped on the stack for each register. */#define NUM_REG_ITEMS 3/* Individual items aside from the registers. */#ifdef DEBUG# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */#else# define NUM_NONREG_ITEMS 4#endif/* We push at most this many items on the stack. *//* We used to use (num_regs - 1), which is the number of registers this regexp will save; but that was changed to 5 to avoid stack overflow for a regexp with lots of parens. */#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -