⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 HLPDK V10.0+ System Extension Library
💻 C
📖 第 1 页 / 共 5 页
字号:
   jump uses, say, to decide how many times to jump.
   
   If you call this function, you must zero out pending_exact.  */

static void
store_jump_n (from, opcode, to, n)
     char *from, *to;
     char opcode;
     unsigned n;
{
  from[0] = opcode;
  STORE_NUMBER (from + 1, to - (from + 3));
  STORE_NUMBER (from + 3, n);
}


/* Similar to insert_jump, but handles a jump which needs an extra
   number to handle minimum and maximum cases.  Open up space at
   location FROM, and insert there a jump to TO.  CURRENT_END gives the
   end of the storage in use, so we know how much data to copy up. OP is
   the opcode of the jump to insert.

   If you call this function, you must zero out pending_exact.  */

static void
insert_jump_n (op, from, to, current_end, n)
     char op;
     char *from, *to, *current_end;
     unsigned n;
{
  register char *pfrom = current_end;		/* Copy from here...  */
  register char *pto = current_end + 5;		/* ...to here.  */

  while (pfrom != from)			       
    *--pto = *--pfrom;
  store_jump_n (from, op, to, n);
}


/* Open up space at location THERE, and insert operation OP followed by
   NUM_1 and NUM_2.  CURRENT_END gives the end of the storage in use, so
   we know how much data to copy up.

   If you call this function, you must zero out pending_exact.  */

static void
insert_op_2 (op, there, current_end, num_1, num_2)
     char op;
     char *there, *current_end;
     int num_1, num_2;
{
  register char *pfrom = current_end;		/* Copy from here...  */
  register char *pto = current_end + 5;		/* ...to here.  */

  while (pfrom != there)			       
    *--pto = *--pfrom;
  
  there[0] = op;
  STORE_NUMBER (there + 1, num_1);
  STORE_NUMBER (there + 3, num_2);
}



/* Given a pattern, compute a fastmap from it.  The fastmap records
   which of the (1 << BYTEWIDTH) possible characters can start a string
   that matches the pattern.  This fastmap is used by re_search to skip
   quickly over totally implausible text.

   The caller must supply the address of a (1 << BYTEWIDTH)-byte data 
   area as bufp->fastmap.
   The other components of bufp describe the pattern to be used.  */

void
re_compile_fastmap (bufp)
     struct re_pattern_buffer *bufp;
{
  unsigned char *pattern = (unsigned char *) bufp->buffer;
  int size = bufp->used;
  register char *fastmap = bufp->fastmap;
  register unsigned char *p = pattern;
  register unsigned char *pend = pattern + size;
  register int j, k;
  unsigned char *translate = (unsigned char *) bufp->translate;

  unsigned char *stackb[NFAILURES];
  unsigned char **stackp = stackb;

  unsigned is_a_succeed_n;

  bzero (fastmap, (1 << BYTEWIDTH));
  bufp->fastmap_accurate = 1;
  bufp->can_be_null = 0;
      
  while (p)
    {
      is_a_succeed_n = 0;
      if (p == pend)
	{
	  bufp->can_be_null = 1;
	  break;
	}
#ifdef SWITCH_ENUM_BUG
      switch ((int) ((enum regexpcode) *p++))
#else
      switch ((enum regexpcode) *p++)
#endif
	{
	case exactn:
	  if (translate)
	    fastmap[translate[p[1]]] = 1;
	  else
	    fastmap[p[1]] = 1;
	  break;

        case begline:
        case before_dot:
	case at_dot:
	case after_dot:
	case begbuf:
	case endbuf:
	case wordbound:
	case notwordbound:
	case wordbeg:
	case wordend:
          continue;

	case endline:
	  if (translate)
	    fastmap[translate['\n']] = 1;
	  else
	    fastmap['\n'] = 1;
            
	  if (bufp->can_be_null != 1)
	    bufp->can_be_null = 2;
	  break;

	case jump_n:
        case finalize_jump:
	case maybe_finalize_jump:
	case jump:
	case dummy_failure_jump:
          EXTRACT_NUMBER_AND_INCR (j, p);
	  p += j;	
	  if (j > 0)
	    continue;
          /* Jump backward reached implies we just went through
	     the body of a loop and matched nothing.
	     Opcode jumped to should be an on_failure_jump.
	     Just treat it like an ordinary jump.
	     For a * loop, it has pushed its failure point already;
	     If so, discard that as redundant.  */

          if ((enum regexpcode) *p != on_failure_jump
	      && (enum regexpcode) *p != succeed_n)
	    continue;
          p++;
          EXTRACT_NUMBER_AND_INCR (j, p);
          p += j;		
          if (stackp != stackb && *stackp == p)
            stackp--;
          continue;
	  
        case on_failure_jump:
	handle_on_failure_jump:
          EXTRACT_NUMBER_AND_INCR (j, p);
          *++stackp = p + j;
	  if (is_a_succeed_n)
            EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
	  continue;

	case succeed_n:
	  is_a_succeed_n = 1;
          /* Get to the number of times to succeed.  */
          p += 2;		
	  /* Increment p past the n for when k != 0.  */
          EXTRACT_NUMBER_AND_INCR (k, p);
          if (k == 0)
	    {
              p -= 4;
              goto handle_on_failure_jump;
            }
          continue;
          
	case set_number_at:
          p += 4;
          continue;

        case start_memory:
	case stop_memory:
	  p++;
	  continue;

	case duplicate:
	  bufp->can_be_null = 1;
	  fastmap['\n'] = 1;
	case anychar:
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (j != '\n')
	      fastmap[j] = 1;
	  if (bufp->can_be_null)
	    return;
	  /* Don't return; check the alternative paths
	     so we can set can_be_null if appropriate.  */
	  break;

	case wordchar:
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (SYNTAX (j) == Sword)
	      fastmap[j] = 1;
	  break;

	case notwordchar:
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (SYNTAX (j) != Sword)
	      fastmap[j] = 1;
	  break;

#ifdef emacs
	case syntaxspec:
	  k = *p++;
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (SYNTAX (j) == (enum syntaxcode) k)
	      fastmap[j] = 1;
	  break;

	case notsyntaxspec:
	  k = *p++;
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (SYNTAX (j) != (enum syntaxcode) k)
	      fastmap[j] = 1;
	  break;
#endif /* not emacs */

	case charset:
	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
	      {
		if (translate)
		  fastmap[translate[j]] = 1;
		else
		  fastmap[j] = 1;
	      }
	  break;

	case charset_not:
	  /* Chars beyond end of map must be allowed */
	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
	    if (translate)
	      fastmap[translate[j]] = 1;
	    else
	      fastmap[j] = 1;

	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
	      {
		if (translate)
		  fastmap[translate[j]] = 1;
		else
		  fastmap[j] = 1;
	      }
	  break;
	}

      /* Get here means we have successfully found the possible starting
         characters of one path of the pattern.  We need not follow this
         path any farther.  Instead, look at the next alternative
         remembered in the stack.  */
   if (stackp != stackb)
	p = *stackp--;
      else
	break;
    }
}



/* Like re_search_2, below, but only one string is specified, and
   doesn't let you say where to stop matching. */

int
re_search (pbufp, string, size, startpos, range, regs)
     struct re_pattern_buffer *pbufp;
     char *string;
     int size, startpos, range;
     struct re_registers *regs;
{
  return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range, 
		      regs, size);
}


/* Using the compiled pattern in PBUFP->buffer, first tries to match the
   virtual concatenation of STRING1 and STRING2, starting first at index
   STARTPOS, then at STARTPOS + 1, and so on.  RANGE is the number of
   places to try before giving up.  If RANGE is negative, it searches
   backwards, i.e., the starting positions tried are STARTPOS, STARTPOS
   - 1, etc.  STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.
   In REGS, return the indices of the virtual concatenation of STRING1
   and STRING2 that matched the entire PBUFP->buffer and its contained
   subexpressions.  Do not consider matching one past the index MSTOP in
   the virtual concatenation of STRING1 and STRING2.

   The value returned is the position in the strings at which the match
   was found, or -1 if no match was found, or -2 if error (such as
   failure stack overflow).  */

int
re_search_2 (pbufp, string1, size1, string2, size2, startpos, range,
	     regs, mstop)
     struct re_pattern_buffer *pbufp;
     char *string1, *string2;
     int size1, size2;
     int startpos;
     register int range;
     struct re_registers *regs;
     int mstop;
{
  register char *fastmap = pbufp->fastmap;
  register unsigned char *translate = (unsigned char *) pbufp->translate;
  int total_size = size1 + size2;
  int endpos = startpos + range;
  int val;

  /* Check for out-of-range starting position.  */
  if (startpos < 0  ||  startpos > total_size)
    return -1;
    
  /* Fix up range if it would eventually take startpos outside of the
     virtual concatenation of string1 and string2.  */
  if (endpos < -1)
    range = -1 - startpos;
  else if (endpos > total_size)
    range = total_size - startpos;

  /* Update the fastmap now if not correct already.  */
  if (fastmap && !pbufp->fastmap_accurate)
    re_compile_fastmap (pbufp);
  
  /* If the search isn't to be a backwards one, don't waste time in a
     long search for a pattern that says it is anchored.  */
  if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
      && range > 0)
    {
      if (startpos > 0)
	return -1;
      else
	range = 1;
    }

  while (1)
    { 
      /* If a fastmap is supplied, skip quickly over characters that
         cannot possibly be the start of a match.  Note, however, that
         if the pattern can possibly match the null string, we must
         test it at each starting point so that we take the first null
         string we get.  */

      if (fastmap && startpos < total_size && pbufp->can_be_null != 1)
	{
	  if (range > 0)	/* Searching forwards.  */
	    {
	      register int lim = 0;
	      register unsigned char *p;
	      int irange = range;
	      if (startpos < size1 && startpos + range >= size1)
		lim = range - (size1 - startpos);

	      p = ((unsigned char *)
		   &(startpos >= size1 ? string2 - size1 : string1)[startpos]);

              while (range > lim && !fastmap[translate 
                                             ? translate[*p++]
                                             : *p++])
		    range--;
	      startpos += irange - range;
	    }
	  else				/* Searching backwards.  */
	    {
	      register unsigned char c;

              if (string1 == 0 || startpos >= size1)
		c = string2[startpos - size1];
	      else 
		c = string1[startpos];

              c &= 0xff;
	      if (translate ? !fastmap[translate[c]] : !fastmap[c])
		goto advance;
	    }
	}

      if (range >= 0 && startpos == total_size
	  && fastmap && pbufp->can_be_null == 0)
	return -1;

      val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
			regs, mstop);
      if (val >= 0)
	return startpos;
      if (val == -2)
	return -2;

#ifdef C_ALLOCA
      alloca (0);
#endif /* C_ALLOCA */

    advance:
      if (!range) 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -