⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.cc

📁 早期freebsd实现
💻 CC
📖 第 1 页 / 共 5 页
字号:
  while (pfrom != from)			           *--pto = *--pfrom;  store_jump_n (from, op, to, n);}/* Open up space at location THERE, and insert operation OP followed by   NUM_1 and NUM_2.  CURRENT_END gives the end of the storage in use, so   we know how much data to copy up.   If you call this function, you must zero out pending_exact.  */static voidinsert_op_2 (char op, char *there, char *current_end, int num_1, int num_2){  register char *pfrom = current_end;		/* Copy from here...  */  register char *pto = current_end + 5;		/* ...to here.  */  while (pfrom != there)			           *--pto = *--pfrom;    there[0] = op;  STORE_NUMBER (there + 1, num_1);  STORE_NUMBER (there + 3, num_2);}/* Given a pattern, compute a fastmap from it.  The fastmap records   which of the (1 << BYTEWIDTH) possible characters can start a string   that matches the pattern.  This fastmap is used by re_search to skip   quickly over totally implausible text.   The caller must supply the address of a (1 << BYTEWIDTH)-byte data    area as bufp->fastmap.   The other components of bufp describe the pattern to be used.  */voidre_compile_fastmap (struct re_pattern_buffer *bufp){  unsigned char *pattern = (unsigned char *) bufp->buffer;  int size = bufp->used;  register char *fastmap = bufp->fastmap;  register unsigned char *p = pattern;  register unsigned char *pend = pattern + size;  register int j, k;  unsigned char *translate = (unsigned char *) bufp->translate;  unsigned char *stackb[NFAILURES];  unsigned char **stackp = stackb;  unsigned is_a_succeed_n;  memset (fastmap, 0, (1 << BYTEWIDTH));  bufp->fastmap_accurate = 1;  bufp->can_be_null = 0;        while (p)    {      is_a_succeed_n = 0;      if (p == pend)	{	  bufp->can_be_null = 1;	  break;	}#ifdef SWITCH_ENUM_BUG      switch ((int) ((enum regexpcode) *p++))#else      switch ((enum regexpcode) *p++)#endif	{	case exactn:	  if (translate)	    fastmap[translate[p[1]]] = 1;	  else	    fastmap[p[1]] = 1;	  break;        case unused:        case begline:#ifdef emacs        case before_dot:	case at_dot:	case after_dot:#endif	case begbuf:	case endbuf:	case wordbound:	case notwordbound:	case wordbeg:	case wordend:          continue;	case endline:	  if (translate)	    fastmap[translate['\n']] = 1;	  else	    fastmap['\n'] = 1;            	  if (bufp->can_be_null != 1)	    bufp->can_be_null = 2;	  break;	case jump_n:        case finalize_jump:	case maybe_finalize_jump:	case jump:	case dummy_failure_jump:          EXTRACT_NUMBER_AND_INCR (j, p);	  p += j;		  if (j > 0)	    continue;          /* Jump backward reached implies we just went through	     the body of a loop and matched nothing.	     Opcode jumped to should be an on_failure_jump.	     Just treat it like an ordinary jump.	     For a * loop, it has pushed its failure point already;	     If so, discard that as redundant.  */          if ((enum regexpcode) *p != on_failure_jump	      && (enum regexpcode) *p != succeed_n)	    continue;          p++;          EXTRACT_NUMBER_AND_INCR (j, p);          p += j;		          if (stackp != stackb && *stackp == p)            stackp--;          continue;	          case on_failure_jump:	handle_on_failure_jump:          EXTRACT_NUMBER_AND_INCR (j, p);          *++stackp = p + j;	  if (is_a_succeed_n)            EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */	  continue;	case succeed_n:	  is_a_succeed_n = 1;          /* Get to the number of times to succeed.  */          p += 2;			  /* Increment p past the n for when k != 0.  */          EXTRACT_NUMBER_AND_INCR (k, p);          if (k == 0)	    {              p -= 4;              goto handle_on_failure_jump;            }          continue;          	case set_number_at:          p += 4;          continue;        case start_memory:	case stop_memory:	  p++;	  continue;	case duplicate:	  bufp->can_be_null = 1;	  fastmap['\n'] = 1;	case anychar:	  for (j = 0; j < (1 << BYTEWIDTH); j++)	    if (j != '\n')	      fastmap[j] = 1;	  if (bufp->can_be_null)	    return;	  /* Don't return; check the alternative paths	     so we can set can_be_null if appropriate.  */	  break;	case wordchar:	  for (j = 0; j < (1 << BYTEWIDTH); j++)	    if (SYNTAX (j) == Sword)	      fastmap[j] = 1;	  break;	case notwordchar:	  for (j = 0; j < (1 << BYTEWIDTH); j++)	    if (SYNTAX (j) != Sword)	      fastmap[j] = 1;	  break;#ifdef emacs	case syntaxspec:	  k = *p++;	  for (j = 0; j < (1 << BYTEWIDTH); j++)	    if (SYNTAX (j) == (enum syntaxcode) k)	      fastmap[j] = 1;	  break;	case notsyntaxspec:	  k = *p++;	  for (j = 0; j < (1 << BYTEWIDTH); j++)	    if (SYNTAX (j) != (enum syntaxcode) k)	      fastmap[j] = 1;	  break;#endif /* not emacs */	case charset:	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))	      {		if (translate)		  fastmap[translate[j]] = 1;		else		  fastmap[j] = 1;	      }	  break;	case charset_not:	  /* Chars beyond end of map must be allowed */	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)	    if (translate)	      fastmap[translate[j]] = 1;	    else	      fastmap[j] = 1;	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))	      {		if (translate)		  fastmap[translate[j]] = 1;		else		  fastmap[j] = 1;	      }	  break;	}      /* Get here means we have successfully found the possible starting         characters of one path of the pattern.  We need not follow this         path any farther.  Instead, look at the next alternative         remembered in the stack.  */   if (stackp != stackb)	p = *stackp--;      else	break;    }}/* Like re_search_2, below, but only one string is specified, and   doesn't let you say where to stop matching. */intre_search (struct re_pattern_buffer *pbufp,	   char *string,	   int size,	   int startpos,	   int range,	   struct re_registers *regs){  return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range, 		      regs, size);}/* Using the compiled pattern in PBUFP->buffer, first tries to match the   virtual concatenation of STRING1 and STRING2, starting first at index   STARTPOS, then at STARTPOS + 1, and so on.  RANGE is the number of   places to try before giving up.  If RANGE is negative, it searches   backwards, i.e., the starting positions tried are STARTPOS, STARTPOS   - 1, etc.  STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.   In REGS, return the indices of the virtual concatenation of STRING1   and STRING2 that matched the entire PBUFP->buffer and its contained   subexpressions.  Do not consider matching one past the index MSTOP in   the virtual concatenation of STRING1 and STRING2.   The value returned is the position in the strings at which the match   was found, or -1 if no match was found, or -2 if error (such as   failure stack overflow).  */intre_search_2 (struct re_pattern_buffer *pbufp,	     char *string1, int size1,	     char *string2, int size2,	     int startpos,	     register int range,	     struct re_registers *regs,	     int mstop){  register char *fastmap = pbufp->fastmap;  register unsigned char *translate = (unsigned char *) pbufp->translate;  int total_size = size1 + size2;  int endpos = startpos + range;  int val;  /* Check for out-of-range starting position.  */  if (startpos < 0  ||  startpos > total_size)    return -1;      /* Fix up range if it would eventually take startpos outside of the     virtual concatenation of string1 and string2.  */  if (endpos < -1)    range = -1 - startpos;  else if (endpos > total_size)    range = total_size - startpos;  /* Update the fastmap now if not correct already.  */  if (fastmap && !pbufp->fastmap_accurate)    re_compile_fastmap (pbufp);    /* If the search isn't to be a backwards one, don't waste time in a     long search for a pattern that says it is anchored.  */  if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf      && range > 0)    {      if (startpos > 0)	return -1;      else	range = 1;    }  while (1)    {       /* If a fastmap is supplied, skip quickly over characters that         cannot possibly be the start of a match.  Note, however, that         if the pattern can possibly match the null string, we must         test it at each starting point so that we take the first null         string we get.  */      if (fastmap && startpos < total_size && pbufp->can_be_null != 1)	{	  if (range > 0)	/* Searching forwards.  */	    {	      register int lim = 0;	      register unsigned char *p;	      int irange = range;	      if (startpos < size1 && startpos + range >= size1)		lim = range - (size1 - startpos);	      p = ((unsigned char *)		   &(startpos >= size1 ? string2 - size1 : string1)[startpos]);              while (range > lim && !fastmap[translate                                              ? translate[*p++]                                             : *p++])		    range--;	      startpos += irange - range;	    }	  else				/* Searching backwards.  */	    {	      register unsigned char c;              if (string1 == 0 || startpos >= size1)		c = string2[startpos - size1];	      else 		c = string1[startpos];              c &= 0xff;	      if (translate ? !fastmap[translate[c]] : !fastmap[c])		goto advance;	    }	}      if (range >= 0 && startpos == total_size	  && fastmap && pbufp->can_be_null == 0)	return -1;      val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,			regs, mstop);      if (val >= 0)	return startpos;      if (val == -2)	return -2;#ifdef C_ALLOCA      alloca (0);#endif /* C_ALLOCA */    advance:      if (!range)         break;      else if (range > 0)         {          range--;           startpos++;        }      else        {          range++;           startpos--;        }    }  return -1;}#ifndef emacs   /* emacs never uses this.  */intre_match (struct re_pattern_buffer *pbufp,	  char *string,	  int size,	  int pos,	  struct re_registers *regs){  return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size); }#endif /* not emacs *//* The following are used for re_match_2, defined below:  *//* Roughly the maximum number of failure points on the stack.  Would be   exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed.  */   int re_max_failures = 2000;/* Routine used by re_match_2.  */static int bcmp_translate (char *, char *, int, unsigned char *);/* Structure and accessing macros used in re_match_2:  */struct register_info

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -