⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 正则表达式匹配源码
💻 C
📖 第 1 页 / 共 5 页
字号:
   {start,stop}_memory, the maximum number of groups we can report   things about is what fits in that byte.  */#define MAX_REGNUM 255/* But patterns can have more than `MAX_REGNUM' registers.  We just   ignore the excess.  */typedef unsigned regnum_t;/* Macros for the compile stack.  *//* Since offsets can go either forwards or backwards, this type needs to   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  *//* int may be not enough when sizeof(int) == 2.  */typedef long pattern_offset_t;typedef struct{  pattern_offset_t begalt_offset;  pattern_offset_t fixup_alt_jump;  pattern_offset_t inner_group_offset;  pattern_offset_t laststart_offset;  regnum_t regnum;} compile_stack_elt_t;typedef struct{  compile_stack_elt_t *stack;  unsigned size;  unsigned avail;			/* Offset of next open position.  */} compile_stack_type;#define INIT_COMPILE_STACK_SIZE 32#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)/* The next available element.  */#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])/* Set the bit for character C in a list.  */#define SET_LIST_BIT(c)                               \  (b[((unsigned char) (c)) / BYTEWIDTH]               \   |= 1 << (((unsigned char) c) % BYTEWIDTH))/* Get the next unsigned number in the uncompiled pattern.  */#define GET_UNSIGNED_NUMBER(num) 					\  { if (p != pend)							\     {									\       PATFETCH (c); 							\       while (ISDIGIT (c)) 						\         { 								\           if (num < 0)							\              num = 0;							\           num = num * 10 + c - '0'; 					\           if (p == pend) 						\              break; 							\           PATFETCH (c);						\         } 								\       } 								\    }#if defined _LIBC || WIDE_CHAR_SUPPORT/* The GNU C library provides support for user-defined character classes   and the functions from ISO C amendement 1.  */# ifdef CHARCLASS_NAME_MAX#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX# else/* This shouldn't happen but some implementation might still have this   problem.  Use a reasonable default value.  */#  define CHAR_CLASS_MAX_LENGTH 256# endif# ifdef _LIBC#  define IS_CHAR_CLASS(string) __wctype (string)# else#  define IS_CHAR_CLASS(string) wctype (string)# endif#else# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */# define IS_CHAR_CLASS(string)						\   (STREQ (string, "alpha") || STREQ (string, "upper")			\    || STREQ (string, "lower") || STREQ (string, "digit")		\    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\    || STREQ (string, "space") || STREQ (string, "print")		\    || STREQ (string, "punct") || STREQ (string, "graph")		\    || STREQ (string, "cntrl") || STREQ (string, "blank"))#endif#ifndef MATCH_MAY_ALLOCATE/* If we cannot allocate large objects within re_match_2_internal,   we make the fail stack and register vectors global.   The fail stack, we grow to the maximum size when a regexp   is compiled.   The register vectors, we adjust in size each time we   compile a regexp, according to the number of registers it needs.  */static fail_stack_type fail_stack;/* Size with which the following vectors are currently allocated.   That is so we can make them bigger as needed,   but never make them smaller.  */static int regs_allocated_size;static const char **     regstart, **     regend;static const char ** old_regstart, ** old_regend;static const char **best_regstart, **best_regend;static register_info_type *reg_info;static const char **reg_dummy;static register_info_type *reg_info_dummy;/* Make the register vectors big enough for NUM_REGS registers,   but don't make them smaller.  */staticregex_grow_registers (num_regs)     int num_regs;{  if (num_regs > regs_allocated_size)    {      RETALLOC_IF (regstart,	 num_regs, const char *);      RETALLOC_IF (regend,	 num_regs, const char *);      RETALLOC_IF (old_regstart, num_regs, const char *);      RETALLOC_IF (old_regend,	 num_regs, const char *);      RETALLOC_IF (best_regstart, num_regs, const char *);      RETALLOC_IF (best_regend,	 num_regs, const char *);      RETALLOC_IF (reg_info,	 num_regs, register_info_type);      RETALLOC_IF (reg_dummy,	 num_regs, const char *);      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);      regs_allocated_size = num_regs;    }}#endif /* not MATCH_MAY_ALLOCATE */static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type						 compile_stack,						 regnum_t regnum));/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.   Returns one of error codes defined in `regex.h', or zero for success.   Assumes the `allocated' (and perhaps `buffer') and `translate'   fields are set in BUFP on entry.   If it succeeds, results are put in BUFP (if it returns an error, the   contents of BUFP are undefined):     `buffer' is the compiled pattern;     `syntax' is set to SYNTAX;     `used' is set to the length of the compiled pattern;     `fastmap_accurate' is zero;     `re_nsub' is the number of subexpressions in PATTERN;     `not_bol' and `not_eol' are zero;   The `fastmap' and `newline_anchor' fields are neither   examined nor set.  *//* Return, freeing storage we allocated.  */#define FREE_STACK_RETURN(value)		\  return (free (compile_stack.stack), value)static reg_errcode_tregex_compile (pattern, size, syntax, bufp)     const char *pattern;     size_t size;     reg_syntax_t syntax;     struct re_pattern_buffer *bufp;{  /* We fetch characters from PATTERN here.  Even though PATTERN is     `char *' (i.e., signed), we declare these variables as unsigned, so     they can be reliably used as array indices.  */  register unsigned char c, c1;  /* A random temporary spot in PATTERN.  */  const char *p1;  /* Points to the end of the buffer, where we should append.  */  register unsigned char *b;  /* Keeps track of unclosed groups.  */  compile_stack_type compile_stack;  /* Points to the current (ending) position in the pattern.  */  const char *p = pattern;  const char *pend = pattern + size;  /* How to translate the characters in the pattern.  */  RE_TRANSLATE_TYPE translate = bufp->translate;  /* Address of the count-byte of the most recently inserted `exactn'     command.  This makes it possible to tell if a new exact-match     character can be added to that command or if the character requires     a new `exactn' command.  */  unsigned char *pending_exact = 0;  /* Address of start of the most recently finished expression.     This tells, e.g., postfix * where to find the start of its     operand.  Reset at the beginning of groups and alternatives.  */  unsigned char *laststart = 0;  /* Address of beginning of regexp, or inside of last group.  */  unsigned char *begalt;  /* Place in the uncompiled pattern (i.e., the {) to     which to go back if the interval is invalid.  */  const char *beg_interval;  /* Address of the place where a forward jump should go to the end of     the containing expression.  Each alternative of an `or' -- except the     last -- ends with a forward jump of this sort.  */  unsigned char *fixup_alt_jump = 0;  /* Counts open-groups as they are encountered.  Remembered for the     matching close-group on the compile stack, so the same register     number is put in the stop_memory as the start_memory.  */  regnum_t regnum = 0;#ifdef DEBUG  DEBUG_PRINT1 ("\nCompiling pattern: ");  if (debug)    {      unsigned debug_count;      for (debug_count = 0; debug_count < size; debug_count++)        putchar (pattern[debug_count]);      putchar ('\n');    }#endif /* DEBUG */  /* Initialize the compile stack.  */  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);  if (compile_stack.stack == NULL)    return REG_ESPACE;  compile_stack.size = INIT_COMPILE_STACK_SIZE;  compile_stack.avail = 0;  /* Initialize the pattern buffer.  */  bufp->syntax = syntax;  bufp->fastmap_accurate = 0;  bufp->not_bol = bufp->not_eol = 0;  /* Set `used' to zero, so that if we return an error, the pattern     printer (for debugging) will think there's no pattern.  We reset it     at the end.  */  bufp->used = 0;  /* Always count groups, whether or not bufp->no_sub is set.  */  bufp->re_nsub = 0;#if !defined emacs && !defined SYNTAX_TABLE  /* Initialize the syntax table.  */   init_syntax_once ();#endif  if (bufp->allocated == 0)    {      if (bufp->buffer)	{ /* If zero allocated, but buffer is non-null, try to realloc             enough space.  This loses if buffer's address is bogus, but             that is the user's responsibility.  */          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);        }      else        { /* Caller did not allocate a buffer.  Do it for them.  */          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);        }      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);      bufp->allocated = INIT_BUF_SIZE;    }  begalt = b = bufp->buffer;  /* Loop through the uncompiled pattern until we're at the end.  */  while (p != pend)    {      PATFETCH (c);      switch (c)        {        case '^':          {            if (   /* If at start of pattern, it's an operator.  */                   p == pattern + 1                   /* If context independent, it's an operator.  */                || syntax & RE_CONTEXT_INDEP_ANCHORS                   /* Otherwise, depends on what's come before.  */                || at_begline_loc_p (pattern, p, syntax))              BUF_PUSH (begline);            else              goto normal_char;          }          break;        case '$':          {            if (   /* If at end of pattern, it's an operator.  */                   p == pend                   /* If context independent, it's an operator.  */                || syntax & RE_CONTEXT_INDEP_ANCHORS                   /* Otherwise, depends on what's next.  */                || at_endline_loc_p (p, pend, syntax))               BUF_PUSH (endline);             else               goto normal_char;           }           break;	case '+':        case '?':          if ((syntax & RE_BK_PLUS_QM)              || (syntax & RE_LIMITED_OPS))            goto normal_char;        handle_plus:        case '*':          /* If there is no previous pattern... */          if (!laststart)            {              if (syntax & RE_CONTEXT_INVALID_OPS)                FREE_STACK_RETURN (REG_BADRPT);              else if (!(syntax & RE_CONTEXT_INDEP_OPS))                goto normal_char;            }          {            /* Are we optimizing this jump?  */            boolean keep_string_p = false;            /* 1 means zero (many) matches is allowed.  */            char zero_times_ok = 0, many_times_ok = 0;            /* If there is a sequence of repetition chars, collapse it               down to just one (the right one).  We can't combine               interval operators with these because of, e.g., `a{2}*',               which should only match an even number of `a's.  */            for (;;)              {                zero_times_ok |= c != '+';                many_times_ok |= c != '?';                if (p == pend)                  break;                PATFETCH (c);                if (c == '*'                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))                  ;                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')                  {                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);                    PATFETCH (c1);                    if (!(c1 == '+' || c1 == '?'))                      {                        PATUNFETCH;                        PATUNFETCH;                        break;                      }                    c = c1;                  }                else                  {                    PATUNFETCH;                    break;                  }                /* If we get here, we found another repeat character.  */               }            /* Star, etc. applied to an empty pattern is equivalent               to an empty pattern.  */            if (!laststart)              break;            /* Now we know whether or not zero matches is allowed               and also whether or not two or more matches is allowed.  */            if (many_times_ok)              { /* More than one repetition is allowed, so put in at the                   end a backward relative jump from `b' to before the next                   jump we're going to put in below (which jumps from                   laststart to after this jump).                   But if we are at the `*' in the exact sequence `.*\n',                   insert an unconditional jump backwards to the .,                   instead of the beginning of the loop.  This way we only                   push a failure point once, instead of every time                   through the loop.  */                assert (p - 1 > pattern);                /* Allocate the space for the jump.  */                GET_BUFFER_SPACE (3);                /* We know we are not at the first character of the pattern,                   because laststart was nonzero.  And we've already                   incremented `p', by the wa

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -