⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 x_regex.c

📁 一套客户/服务器模式的备份系统代码,跨平台,支持linux,AIX, IRIX, FreeBSD, Digital Unix (OSF1), Solaris and HP-UX.
💻 C
📖 第 1 页 / 共 5 页
字号:
/* Store a jump with opcode OP at LOC to location TO.  We store a   relative address offset by the three bytes the jump itself occupies.  */#define STORE_JUMP(op, loc, to) \  store_op1 (op, loc, (UChar *)(to) - (UChar *)(loc) - 3)/* Likewise, for a two-argument jump.  */#define STORE_JUMP2(op, loc, to, arg) \  store_op2 (op, loc, (UChar *)(to) - (UChar *)(loc) - 3, (Int32)arg)/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */#define INSERT_JUMP(op, loc, to) \  insert_op1 (op, (UChar *) (loc), (UChar *)(to) - (UChar *)(loc) - 3, b)/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */#define INSERT_JUMP2(op, loc, to, arg) \  insert_op2 (op, (UChar *) (loc), (UChar *)(to) - (UChar *)(loc) - 3, arg, b)/* This is not an arbitrary limit: the arguments which represent offsets   into the pattern are two bytes long.  So if 2^16 bytes turns out to   be too small, many things would have to change.  */#define MAX_BUF_SIZE (1L << 16)/* Extend the buffer by twice its current size via realloc and   reset the pointers that pointed into the old block to point to the   correct places in the new one.  If extending the buffer results in it   being larger than MAX_BUF_SIZE, then flag memory exhausted.	*/#define EXTEND_BUFFER()							\  do { 									\    UChar *old_buffer = bufp->buffer;				\    if (bufp->allocated == MAX_BUF_SIZE) 				\      return REG_ESIZE;							\    bufp->allocated <<= 1;						\    if (bufp->allocated > MAX_BUF_SIZE)					\      bufp->allocated = MAX_BUF_SIZE; 					\    bufp->buffer = (UChar *) realloc_forced (bufp->buffer, bufp->allocated);\    if (bufp->buffer == NULL)						\      return REG_ESPACE;						\    /* If the buffer moved, move all the pointers into it.  */		\    if (old_buffer != bufp->buffer)					\      {									\	b = (b - old_buffer) + bufp->buffer;				\	begalt = (begalt - old_buffer) + bufp->buffer;			\	if (fixup_alt_jump)						\	  fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\	if (laststart)							\	  laststart = (laststart - old_buffer) + bufp->buffer;		\	if (pending_exact)						\	  pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\      }									\  } while (0)/* Since we have one byte reserved for the register number argument to   {start,stop}_memory, the maximum number of groups we can report   things about is what fits in that byte.  */#define MAX_REGNUM 255/* But patterns can have more than `MAX_REGNUM' registers.  We just   ignore the excess.  */typedef Uns32 regnum_t;/* Macros for the compile stack.  *//* Since offsets can go either forwards or backwards, this type needs to   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */typedef Int32 pattern_offset_t;typedef struct{  pattern_offset_t begalt_offset;  pattern_offset_t fixup_alt_jump;  pattern_offset_t inner_group_offset;  pattern_offset_t laststart_offset;    regnum_t regnum;} compile_stack_elt_t;typedef struct{  compile_stack_elt_t *stack;  Uns32 size;  Uns32 avail;			/* Offset of next open position.  */} compile_stack_type;#define INIT_COMPILE_STACK_SIZE 32#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)/* The next available element.	*/#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])/* Set the bit for character C in a list.  */#define SET_LIST_BIT(c) 			      \  (b[((UChar) (c)) / BYTEWIDTH] 	      \   |= 1 << (((UChar) c) % BYTEWIDTH))/* Get the next unsigned number in the uncompiled pattern.  */#define GET_UNSIGNED_NUMBER(num) 					\  { if (p != pend)							\     {									\       PATFETCH (c); 							\       while (ISDIGIT (c)) 						\	 { 								\	   if (num < 0)							\	      num = 0;							\	   num = num * 10 + c - '0'; 					\	   if (p == pend) 						\	      break; 							\	   PATFETCH (c);						\	 } 								\       } 								\    }		#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */#define IS_CHAR_CLASS(string)						\   (STREQ (string, "alpha") || STREQ (string, "upper")			\    || STREQ (string, "lower") || STREQ (string, "digit")		\    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\    || STREQ (string, "space") || STREQ (string, "print")		\    || STREQ (string, "punct") || STREQ (string, "graph")		\    || STREQ (string, "cntrl") || STREQ (string, "blank"))/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.   Returns one of error codes defined in `regex.h', or zero for success.   Assumes the `allocated' (and perhaps `buffer') and `translate'   fields are set in BUFP on entry.   If it succeeds, results are put in BUFP (if it returns an error, the   contents of BUFP are undefined):     `buffer' is the compiled pattern;     `syntax' is set to SYNTAX;     `used' is set to the length of the compiled pattern;     `fastmap_accurate' is zero;     `re_nsub' is the number of subexpressions in PATTERN;     `not_bol' and `not_eol' are zero;      The `fastmap' and `newline_anchor' fields are neither   examined nor set.  */static reg_errcode_tregex_compile (pattern, size, syntax, bufp)     UChar *pattern;     Int32 size;     reg_syntax_t syntax;     struct re_pattern_buffer *bufp;{  /* We fetch characters from PATTERN here.  Even though PATTERN is     `char *' (i.e., signed), we declare these variables as unsigned, so     they can be reliably used as array indices.  */  register UChar c, c1;    /* A random tempory spot in PATTERN.	*/  UChar *p1;  /* Points to the end of the buffer, where we should append.  */  register UChar *b;    /* Keeps track of unclosed groups.  */  compile_stack_type compile_stack;  /* Points to the current (ending) position in the pattern.  */  UChar *p = pattern;  UChar *pend = pattern + size;    /* How to translate the characters in the pattern.  */  UChar *translate = bufp->translate;  /* Address of the count-byte of the most recently inserted `exactn'     command.  This makes it possible to tell if a new exact-match     character can be added to that command or if the character requires     a new `exactn' command.  */  UChar *pending_exact = 0;  /* Address of start of the most recently finished expression.     This tells, e.g., postfix * where to find the start of its     operand.  Reset at the beginning of groups and alternatives.  */  UChar *laststart = 0;  /* Address of beginning of regexp, or inside of last group.  */  UChar *begalt;  /* Place in the uncompiled pattern (i.e., the {) to     which to go back if the interval is invalid.  */  UChar *beg_interval;		  /* Address of the place where a forward jump should go to the end of     the containing expression.  Each alternative of an `or' -- except the     last -- ends with a forward jump of this sort.  */  UChar *fixup_alt_jump = 0;  /* Counts open-groups as they are encountered.  Remembered for the     matching close-group on the compile stack, so the same register     number is put in the stop_memory as the start_memory.  */  regnum_t regnum = 0;#ifdef DEBUG  DEBUG_PRINT1 ("\nCompiling pattern: ");  if (debug)    {      Uns32 debug_count;            for (debug_count = 0; debug_count < size; debug_count++)	printchar (pattern[debug_count]);      putchar ('\n');    }#endif /* DEBUG */  /* Initialize the compile stack.  */  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);  if (compile_stack.stack == NULL)    return REG_ESPACE;  compile_stack.size = INIT_COMPILE_STACK_SIZE;  compile_stack.avail = 0;  /* Initialize the pattern buffer.  */  bufp->syntax = syntax;  bufp->fastmap_accurate = 0;  bufp->not_bol = bufp->not_eol = 0;  /* Set `used' to zero, so that if we return an error, the pattern     printer (for debugging) will think there's no pattern.  We reset it     at the end.  */  bufp->used = 0;    /* Always count groups, whether or not bufp->no_sub is set.  */  bufp->re_nsub = 0;				#if !defined (emacs) && !defined (SYNTAX_TABLE)  /* Initialize the syntax table.  */   init_syntax_once ();#endif  if (bufp->allocated == 0)    {      if (bufp->buffer)	{ /* If zero allocated, but buffer is non-null, try to realloc	     enough space.  This loses if buffer's address is bogus, but	     that is the user's responsibility.  */	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, UChar);	}      else	{ /* Caller did not allocate a buffer.	Do it for them.  */	  bufp->buffer = TALLOC (INIT_BUF_SIZE, UChar);	}      if (!bufp->buffer) return REG_ESPACE;      bufp->allocated = INIT_BUF_SIZE;    }  begalt = b = bufp->buffer;  /* Loop through the uncompiled pattern until we're at the end.  */  while (p != pend)    {      PATFETCH (c);      switch (c)	{	case '^':	  {	    if (   /* If at start of pattern, it's an operator.  */		   p == pattern + 1		   /* If context independent, it's an operator.  */		|| syntax & RE_CONTEXT_INDEP_ANCHORS		   /* Otherwise, depends on what's come before.  */		|| at_begline_loc_p (pattern, p, syntax))	      BUF_PUSH (begline);	    else	      goto normal_char;	  }	  break;	case '$':	  {	    if (   /* If at end of pattern, it's an operator.  */		   p == pend 		   /* If context independent, it's an operator.  */		|| syntax & RE_CONTEXT_INDEP_ANCHORS		   /* Otherwise, depends on what's next.  */		|| at_endline_loc_p (p, pend, syntax))	       BUF_PUSH (endline);	     else	       goto normal_char;	   }	   break;	case '+':	case '?':	  if ((syntax & RE_BK_PLUS_QM)	      || (syntax & RE_LIMITED_OPS))	    goto normal_char;	handle_plus:	case '*':	  /* If there is no previous pattern... */	  if (!laststart)	    {	      if (syntax & RE_CONTEXT_INVALID_OPS)		return REG_BADRPT;	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))		goto normal_char;	    }	  {	    /* Are we optimizing this jump?  */	    boolean keep_string_p = false;	    	    /* 1 means zero (many) matches is allowed.	*/	    UChar zero_times_ok = 0, many_times_ok = 0;	    /* If there is a sequence of repetition chars, collapse it	       down to just one (the right one).  We can't combine	       interval operators with these because of, e.g., `a{2}*',	       which should only match an even number of `a's.	*/	    for (;;)	      {		zero_times_ok |= c != '+';		many_times_ok |= c != '?';		if (p == pend)		  break;		PATFETCH (c);		if (c == '*'		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))		  ;		else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')		  {		    if (p == pend) return REG_EESCAPE;		    PATFETCH (c1);		    if (!(c1 == '+' || c1 == '?'))		      {			PATUNFETCH;			PATUNFETCH;			break;		      }		    c = c1;		  }		else		  {		    PATUNFETCH;		    break;		  }		/* If we get here, we found another repeat character.  */	       }	    /* Star, etc. applied to an empty pattern is equivalent	       to an empty pattern.  */	    if (!laststart)  	      break;	    /* Now we know whether or not zero matches is allowed	       and also whether or not two or more matches is allowed.	*/	    if (many_times_ok)	      { /* More than one repetition is allowed, so put in at the		   end a backward relative jump from `b' to before the next		   jump we're going to put in below (which jumps from		   laststart to after this jump).  		   But if we are at the `*' in the exact sequence `.*\n',		   insert an unconditional jump backwards to the .,		   instead of the beginning of the loop.  This way we only		   push a failure point once, instead of every time		   through the loop.  */		assert (p - 1 > pattern);		/* Allocate the space for the jump.  */		GET_BUFFER_SPACE (3);		/* We know we are not at the first character of the pattern,		   because laststart was nonzero.  And we've already		   incremented `p', by the way, to be the character after		   the `*'.  Do we have to do something analogous here		   for null bytes, because of RE_DOT_NOT_NULL?	*/		if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')		    && zero_times_ok		    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')		    && !(syntax & RE_DOT_NEWLINE))		  { /* We have .*\n.  */		    STORE_JUMP (jump, b, laststart);		    keep_string_p = true;		  }		else		  /* Anything else. */		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);		/* We've added more stuff to the buffer.  */		b += 3;	      }	    /* On failure, jump from laststart to b + 3, which will be the	       end of the buffer after this jump is inserted.  */	    GET_BUFFER_SPACE (3);	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump				       : on_failure_jump,			 laststart, b + 3);	    pending_exact = 0;	    b += 3;	    if (!zero_times_ok)	      {		/* At least one repetition is required, so insert a		   `dummy_failure_jump' before the initial		   `on_failure_jump' instruction of the loop. This		   effects a skip over that instruction the first time		   we hit that loop.  */		GET_BUFFER_SPACE (3);		INSERT_JUMP (dummy_failure_jump, laststart,					(laststart + 6));		b += 3;	      }	    }	  break;	case '.':	  laststart = b;	  BUF_PUSH (anychar);	  break;	case '[':	  {	    boolean had_char_class = false;	    if (p == pend) return REG_EBRACK;	    /* Ensure that we have enough space to push a charset: the	       opcode, the length count, and the bitset; 34 bytes in all.  */	    GET_BUFFER_SPACE (34);	    laststart = b;	    /* We test `*p == '^' twice, instead of using an if	       statement, so we only need one BUF_PUSH.  */	    BUF_PUSH (*p == '^' ? charset_not : charset); 	    if (*p == '^')	      p++;	    /* Remember the first position in the bracket expression.  */	    p1 = p;	    /* Push the number of bytes in the bitmap.	*/	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);	    /* Clear the whole map.  */	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);	    /* charset_not matches newline according to a syntax bit.  */	    if ((re_opcode_t) b[-2] == charset_not		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))	      SET_LIST_BIT ('\n');	    /* Read in characters and ranges, setting map bits.  */	    for (;;)	      {		if (p == pend) return REG_EBRACK;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -