⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 正则表达式库
💻 C
📖 第 1 页 / 共 5 页
字号:
reg_syntax_tre_set_syntax (syntax)    reg_syntax_t syntax;{  reg_syntax_t ret = re_syntax_options;    re_syntax_options = syntax;  return ret;}/* This table gives an error message for each of the error codes listed   in regex.h.  Obviously the order here has to be same as there.  */static const char *re_error_msg[] =  { NULL,					/* REG_NOERROR */    "No match",					/* REG_NOMATCH */    "Invalid regular expression",		/* REG_BADPAT */    "Invalid collation character",		/* REG_ECOLLATE */    "Invalid character class name",		/* REG_ECTYPE */    "Trailing backslash",			/* REG_EESCAPE */    "Invalid back reference",			/* REG_ESUBREG */    "Unmatched [ or [^",			/* REG_EBRACK */    "Unmatched ( or \\(",			/* REG_EPAREN */    "Unmatched \\{",				/* REG_EBRACE */    "Invalid content of \\{\\}",		/* REG_BADBR */    "Invalid range end",			/* REG_ERANGE */    "Memory exhausted",				/* REG_ESPACE */    "Invalid preceding regular expression",	/* REG_BADRPT */    "Premature end of regular expression",	/* REG_EEND */    "Regular expression too big",		/* REG_ESIZE */    "Unmatched ) or \\)",			/* REG_ERPAREN */  };/* Subroutine declarations and macros for regex_compile.  */static void store_op1 (), store_op2 ();static void insert_op1 (), insert_op2 ();static boolean at_begline_loc_p (), at_endline_loc_p ();static boolean group_in_compile_stack ();static reg_errcode_t compile_range ();/* Fetch the next character in the uncompiled pattern---translating it    if necessary.  Also cast from a signed character in the constant   string passed to us by the user to an unsigned char that we can use   as an array index (in, e.g., `translate').  */#define PATFETCH(c)							\  do {if (p == pend) return REG_EEND;					\    c = (unsigned char) *p++;						\    if (translate) c = translate[c]; 					\  } while (0)/* Fetch the next character in the uncompiled pattern, with no   translation.  */#define PATFETCH_RAW(c)							\  do {if (p == pend) return REG_EEND;					\    c = (unsigned char) *p++; 						\  } while (0)/* Go backwards one character in the pattern.  */#define PATUNFETCH p--/* If `translate' is non-null, return translate[D], else just D.  We   cast the subscript to translate because some data is declared as   `char *', to avoid warnings when a string constant is passed.  But   when we use a character as a subscript we must make it unsigned.  */#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))/* Macros for outputting the compiled pattern into `buffer'.  *//* If the buffer isn't allocated when it comes in, use this.  */#define INIT_BUF_SIZE  32/* Make sure we have at least N more bytes of space in buffer.  */#define GET_BUFFER_SPACE(n)						\    while (b - bufp->buffer + (n) > bufp->allocated)			\      EXTEND_BUFFER ()/* Make sure we have one more byte of buffer space and then add C to it.  */#define BUF_PUSH(c)							\  do {									\    GET_BUFFER_SPACE (1);						\    *b++ = (unsigned char) (c);						\  } while (0)/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */#define BUF_PUSH_2(c1, c2)						\  do {									\    GET_BUFFER_SPACE (2);						\    *b++ = (unsigned char) (c1);					\    *b++ = (unsigned char) (c2);					\  } while (0)/* As with BUF_PUSH_2, except for three bytes.  */#define BUF_PUSH_3(c1, c2, c3)						\  do {									\    GET_BUFFER_SPACE (3);						\    *b++ = (unsigned char) (c1);					\    *b++ = (unsigned char) (c2);					\    *b++ = (unsigned char) (c3);					\  } while (0)/* Store a jump with opcode OP at LOC to location TO.  We store a   relative address offset by the three bytes the jump itself occupies.  */#define STORE_JUMP(op, loc, to) \  store_op1 (op, loc, (to) - (loc) - 3)/* Likewise, for a two-argument jump.  */#define STORE_JUMP2(op, loc, to, arg) \  store_op2 (op, loc, (to) - (loc) - 3, arg)/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */#define INSERT_JUMP(op, loc, to) \  insert_op1 (op, loc, (to) - (loc) - 3, b)/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */#define INSERT_JUMP2(op, loc, to, arg) \  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)/* This is not an arbitrary limit: the arguments which represent offsets   into the pattern are two bytes long.  So if 2^16 bytes turns out to   be too small, many things would have to change.  */#define MAX_BUF_SIZE (1L << 16)/* Extend the buffer by twice its current size via realloc and   reset the pointers that pointed into the old block to point to the   correct places in the new one.  If extending the buffer results in it   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */#define EXTEND_BUFFER()							\  do { 									\    unsigned char *old_buffer = bufp->buffer;				\    if (bufp->allocated == MAX_BUF_SIZE) 				\      return REG_ESIZE;							\    bufp->allocated <<= 1;						\    if (bufp->allocated > MAX_BUF_SIZE)					\      bufp->allocated = MAX_BUF_SIZE; 					\    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\    if (bufp->buffer == NULL)						\      return REG_ESPACE;						\    /* If the buffer moved, move all the pointers into it.  */		\    if (old_buffer != bufp->buffer)					\      {									\        b = (b - old_buffer) + bufp->buffer;				\        begalt = (begalt - old_buffer) + bufp->buffer;			\        if (fixup_alt_jump)						\          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\        if (laststart)							\          laststart = (laststart - old_buffer) + bufp->buffer;		\        if (pending_exact)						\          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\      }									\  } while (0)/* Since we have one byte reserved for the register number argument to   {start,stop}_memory, the maximum number of groups we can report   things about is what fits in that byte.  */#define MAX_REGNUM 255/* But patterns can have more than `MAX_REGNUM' registers.  We just   ignore the excess.  */typedef unsigned regnum_t;/* Macros for the compile stack.  *//* Since offsets can go either forwards or backwards, this type needs to   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */typedef int pattern_offset_t;typedef struct{  pattern_offset_t begalt_offset;  pattern_offset_t fixup_alt_jump;  pattern_offset_t inner_group_offset;  pattern_offset_t laststart_offset;    regnum_t regnum;} compile_stack_elt_t;typedef struct{  compile_stack_elt_t *stack;  unsigned size;  unsigned avail;			/* Offset of next open position.  */} compile_stack_type;#define INIT_COMPILE_STACK_SIZE 32#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)/* The next available element.  */#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])/* Set the bit for character C in a list.  */#define SET_LIST_BIT(c)                               \  (b[((unsigned char) (c)) / BYTEWIDTH]               \   |= 1 << (((unsigned char) c) % BYTEWIDTH))/* Get the next unsigned number in the uncompiled pattern.  */#define GET_UNSIGNED_NUMBER(num) 					\  { if (p != pend)							\     {									\       PATFETCH (c); 							\       while (ISDIGIT (c)) 						\         { 								\           if (num < 0)							\              num = 0;							\           num = num * 10 + c - '0'; 					\           if (p == pend) 						\              break; 							\           PATFETCH (c);						\         } 								\       } 								\    }		#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */#define IS_CHAR_CLASS(string)						\   (STREQ (string, "alpha") || STREQ (string, "upper")			\    || STREQ (string, "lower") || STREQ (string, "digit")		\    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\    || STREQ (string, "space") || STREQ (string, "print")		\    || STREQ (string, "punct") || STREQ (string, "graph")		\    || STREQ (string, "cntrl") || STREQ (string, "blank"))/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.   Returns one of error codes defined in `regex.h', or zero for success.   Assumes the `allocated' (and perhaps `buffer') and `translate'   fields are set in BUFP on entry.   If it succeeds, results are put in BUFP (if it returns an error, the   contents of BUFP are undefined):     `buffer' is the compiled pattern;     `syntax' is set to SYNTAX;     `used' is set to the length of the compiled pattern;     `fastmap_accurate' is zero;     `re_nsub' is the number of subexpressions in PATTERN;     `not_bol' and `not_eol' are zero;      The `fastmap' and `newline_anchor' fields are neither   examined nor set.  */static reg_errcode_tregex_compile (pattern, size, syntax, bufp)     const char *pattern;     int size;     reg_syntax_t syntax;     struct re_pattern_buffer *bufp;{  /* We fetch characters from PATTERN here.  Even though PATTERN is     `char *' (i.e., signed), we declare these variables as unsigned, so     they can be reliably used as array indices.  */  register unsigned char c, c1;    /* A random tempory spot in PATTERN.  */  const char *p1;  /* Points to the end of the buffer, where we should append.  */  register unsigned char *b;    /* Keeps track of unclosed groups.  */  compile_stack_type compile_stack;  /* Points to the current (ending) position in the pattern.  */  const char *p = pattern;  const char *pend = pattern + size;    /* How to translate the characters in the pattern.  */  char *translate = bufp->translate;  /* Address of the count-byte of the most recently inserted `exactn'     command.  This makes it possible to tell if a new exact-match     character can be added to that command or if the character requires     a new `exactn' command.  */  unsigned char *pending_exact = 0;  /* Address of start of the most recently finished expression.     This tells, e.g., postfix * where to find the start of its     operand.  Reset at the beginning of groups and alternatives.  */  unsigned char *laststart = 0;  /* Address of beginning of regexp, or inside of last group.  */  unsigned char *begalt;  /* Place in the uncompiled pattern (i.e., the {) to     which to go back if the interval is invalid.  */  const char *beg_interval;                  /* Address of the place where a forward jump should go to the end of     the containing expression.  Each alternative of an `or' -- except the     last -- ends with a forward jump of this sort.  */  unsigned char *fixup_alt_jump = 0;  /* Counts open-groups as they are encountered.  Remembered for the     matching close-group on the compile stack, so the same register     number is put in the stop_memory as the start_memory.  */  regnum_t regnum = 0;#ifdef DEBUG  DEBUG_PRINT1 ("\nCompiling pattern: ");  if (debug)    {      unsigned debug_count;            for (debug_count = 0; debug_count < size; debug_count++)        printchar (pattern[debug_count]);      putchar ('\n');    }#endif /* DEBUG */  /* Initialize the compile stack.  */  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);  if (compile_stack.stack == NULL)    return REG_ESPACE;  compile_stack.size = INIT_COMPILE_STACK_SIZE;  compile_stack.avail = 0;  /* Initialize the pattern buffer.  */  bufp->syntax = syntax;  bufp->fastmap_accurate = 0;  bufp->not_bol = bufp->not_eol = 0;  /* Set `used' to zero, so that if we return an error, the pattern     printer (for debugging) will think there's no pattern.  We reset it     at the end.  */  bufp->used = 0;    /* Always count groups, whether or not bufp->no_sub is set.  */  bufp->re_nsub = 0;				#if !defined (emacs) && !defined (SYNTAX_TABLE)  /* Initialize the syntax table.  */   init_syntax_once ();#endif  if (bufp->allocated == 0)    {      if (bufp->buffer)	{ /* If zero allocated, but buffer is non-null, try to realloc             enough space.  This loses if buffer's address is bogus, but             that is the user's responsibility.  */          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);        }      else        { /* Caller did not allocate a buffer.  Do it for them.  */          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);        }      if (!bufp->buffer) return REG_ESPACE;      bufp->allocated = INIT_BUF_SIZE;    }  begalt = b = bufp->buffer;  /* Loop through the uncompiled pattern until we're at the end.  */  while (p != pend)    {      PATFETCH (c);      switch (c)        {        case '^':          {            if (   /* If at start of pattern, it's an operator.  */                   p == pattern + 1                   /* If context independent, it's an operator.  */                || syntax & RE_CONTEXT_INDEP_ANCHORS                   /* Otherwise, depends on what's come before.  */                || at_begline_loc_p (pattern, p, syntax))              BUF_PUSH (begline);            else              goto normal_char;          }          break;        case '$':          {            if (   /* If at end of pattern, it's an operator.  */                   p == pend                    /* If context independent, it's an operator.  */                || syntax & RE_CONTEXT_INDEP_ANCHORS                   /* Otherwise, depends on what's next.  */                || at_endline_loc_p (p, pend, syntax))               BUF_PUSH (endline);             else               goto normal_char;           }           break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -