⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 TURBOC 文本处理常规库
💻 C
📖 第 1 页 / 共 4 页
字号:
	      else
		{
		  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
		}
	    }
	  /* Discard any bitmap bytes that are all 0 at the end of the map.
	     Decrement the map-length byte too. */
	  while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
	    b[-1]--;
	  b += b[-1];
	  break;

	case '(':
	  if (! (obscure_syntax & RE_NO_BK_PARENS))
	    goto normal_char;
	  else
	    goto handle_open;

	case ')':
	  if (! (obscure_syntax & RE_NO_BK_PARENS))
	    goto normal_char;
	  else
	    goto handle_close;

	case '\n':
	  if (! (obscure_syntax & RE_NEWLINE_OR))
	    goto normal_char;
	  else
	    goto handle_bar;

	case '|':
	  if (! (obscure_syntax & RE_NO_BK_VBAR))
	    goto normal_char;
	  else
	    goto handle_bar;

        case '\\':
	  if (p == pend) goto invalid_pattern;
	  PATFETCH_RAW (c);
	  switch (c)
	    {
	    case '(':
	      if (obscure_syntax & RE_NO_BK_PARENS)
		goto normal_backsl;
	    handle_open:
	      if (stackp == stacke) goto nesting_too_deep;
	      if (regnum < RE_NREGS)
	        {
		  PATPUSH (start_memory);
		  PATPUSH (regnum);
	        }
	      *stackp++ = b - bufp->buffer;
	      *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
	      *stackp++ = regnum++;
	      *stackp++ = begalt - bufp->buffer;
	      fixup_jump = 0;
	      laststart = 0;
	      begalt = b;
	      break;

	    case ')':
	      if (obscure_syntax & RE_NO_BK_PARENS)
		goto normal_backsl;
	    handle_close:
	      if (stackp == stackb) goto unmatched_close;
	      begalt = *--stackp + bufp->buffer;
	      if (fixup_jump)
		store_jump (fixup_jump, jump, b);
	      if (stackp[-1] < RE_NREGS)
		{
		  PATPUSH (stop_memory);
		  PATPUSH (stackp[-1]);
		}
	      stackp -= 2;
	      fixup_jump = 0;
	      if (*stackp)
		fixup_jump = *stackp + bufp->buffer - 1;
	      laststart = *--stackp + bufp->buffer;
	      break;

	    case '|':
	      if (obscure_syntax & RE_NO_BK_VBAR)
		goto normal_backsl;
	    handle_bar:
	      insert_jump (on_failure_jump, begalt, b + 6, b);
	      pending_exact = 0;
	      b += 3;
	      if (fixup_jump)
		store_jump (fixup_jump, jump, b);
	      fixup_jump = b;
	      b += 3;
	      laststart = 0;
	      begalt = b;
	      break;

#ifdef emacs
	    case '=':
	      PATPUSH (at_dot);
	      break;

	    case 's':	
	      laststart = b;
	      PATPUSH (syntaxspec);
	      PATFETCH (c);
	      PATPUSH (syntax_spec_code[c]);
	      break;

	    case 'S':
	      laststart = b;
	      PATPUSH (notsyntaxspec);
	      PATFETCH (c);
	      PATPUSH (syntax_spec_code[c]);
	      break;
#endif /* emacs */

	    case 'w':
	      laststart = b;
	      PATPUSH (wordchar);
	      break;

	    case 'W':
	      laststart = b;
	      PATPUSH (notwordchar);
	      break;

	    case '<':
	      PATPUSH (wordbeg);
	      break;

	    case '>':
	      PATPUSH (wordend);
	      break;

	    case 'b':
	      PATPUSH (wordbound);
	      break;

	    case 'B':
	      PATPUSH (notwordbound);
	      break;

	    case '`':
	      PATPUSH (begbuf);
	      break;

	    case '\'':
	      PATPUSH (endbuf);
	      break;

	    case '1':
	    case '2':
	    case '3':
	    case '4':
	    case '5':
	    case '6':
	    case '7':
	    case '8':
	    case '9':
	      c1 = c - '0';
	      if (c1 >= regnum)
		goto normal_char;
	      for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
 		if (*stackt == c1)
		  goto normal_char;
	      laststart = b;
	      PATPUSH (duplicate);
	      PATPUSH (c1);
	      break;

	    case '+':
	    case '?':
	      if (obscure_syntax & RE_BK_PLUS_QM)
		goto handle_plus;

	    default:
	    normal_backsl:
	      /* You might think it would be useful for \ to mean
		 not to translate; but if we don't translate it
		 it will never match anything.  */
	      if (translate) c = translate[c];
	      goto normal_char;
	    }
	  break;

	default:
	normal_char:
	  if (!pending_exact || pending_exact + *pending_exact + 1 != b
	      || *pending_exact == 0177 || *p == '*' || *p == '^'
	      || ((obscure_syntax & RE_BK_PLUS_QM)
		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
		  : (*p == '+' || *p == '?')))
	    {
	      laststart = b;
	      PATPUSH (exactn);
	      pending_exact = b;
	      PATPUSH (0);
	    }
	  PATPUSH (c);
	  (*pending_exact)++;
	}
    }

  if (fixup_jump)
    store_jump (fixup_jump, jump, b);

  if (stackp != stackb) goto unmatched_open;

  bufp->used = b - bufp->buffer;
  return 0;

 invalid_pattern:
  return "Invalid regular expression";

 unmatched_open:
  return "Unmatched \\(";

 unmatched_close:
  return "Unmatched \\)";

 end_of_pattern:
  return "Premature end of regular expression";

 nesting_too_deep:
  return "Nesting too deep";

 too_big:
  return "Regular expression too big";

 memory_exhausted:
  return "Memory exhausted";
}

/* Store where `from' points a jump operation to jump to where `to' points.
  `opcode' is the opcode to store. */

static int
store_jump (from, opcode, to)
     char *from, *to;
     char opcode;
{
  from[0] = opcode;
  from[1] = (to - (from + 3)) & 0377;
  from[2] = (to - (from + 3)) >> 8;
}

/* Open up space at char FROM, and insert there a jump to TO.
   CURRENT_END gives te end of the storage no in use,
   so we know how much data to copy up.
   OP is the opcode of the jump to insert.

   If you call this function, you must zero out pending_exact.  */

static int
insert_jump (op, from, to, current_end)
     char op;
     char *from, *to, *current_end;
{
  register char *pto = current_end + 3;
  register char *pfrom = current_end;
  while (pfrom != from)
    *--pto = *--pfrom;
  store_jump (from, op, to);
}

/* Given a pattern, compute a fastmap from it.
 The fastmap records which of the (1 << BYTEWIDTH) possible characters
 can start a string that matches the pattern.
 This fastmap is used by re_search to skip quickly over totally implausible text.

 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
 as bufp->fastmap.
 The other components of bufp describe the pattern to be used.  */

void
re_compile_fastmap (bufp)
     struct re_pattern_buffer *bufp;
{
  unsigned char *pattern = (unsigned char *) bufp->buffer;
  int size = bufp->used;
  register char *fastmap = bufp->fastmap;
  register unsigned char *p = pattern;
  register unsigned char *pend = pattern + size;
  register int j, k;
  unsigned char *translate = (unsigned char *) bufp->translate;

  unsigned char *stackb[NFAILURES];
  unsigned char **stackp = stackb;

  bzero (fastmap, (1 << BYTEWIDTH));
  bufp->fastmap_accurate = 1;
  bufp->can_be_null = 0;
      
  while (p)
    {
      if (p == pend)
	{
	  bufp->can_be_null = 1;
	  break;
	}
#ifdef SWITCH_ENUM_BUG
      switch ((int) ((enum regexpcode) *p++))
#else
      switch ((enum regexpcode) *p++)
#endif
	{
	case exactn:
	  if (translate)
	    fastmap[translate[p[1]]] = 1;
	  else
	    fastmap[p[1]] = 1;
	  break;

        case begline:
        case before_dot:
	case at_dot:
	case after_dot:
	case begbuf:
	case endbuf:
	case wordbound:
	case notwordbound:
	case wordbeg:
	case wordend:
	  continue;

	case endline:
	  if (translate)
	    fastmap[translate['\n']] = 1;
	  else
	    fastmap['\n'] = 1;
	  if (bufp->can_be_null != 1)
	    bufp->can_be_null = 2;
	  break;

	case finalize_jump:
	case maybe_finalize_jump:
	case jump:
	case dummy_failure_jump:
	  bufp->can_be_null = 1;
	  j = *p++ & 0377;
	  j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
	  p += j + 1;		/* The 1 compensates for missing ++ above */
	  if (j > 0)
	    continue;
	  /* Jump backward reached implies we just went through
	     the body of a loop and matched nothing.
	     Opcode jumped to should be an on_failure_jump.
	     Just treat it like an ordinary jump.
	     For a * loop, it has pushed its failure point already;
	     if so, discard that as redundant.  */
	  if ((enum regexpcode) *p != on_failure_jump)
	    continue;
	  p++;
	  j = *p++ & 0377;
	  j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
	  p += j + 1;		/* The 1 compensates for missing ++ above */
	  if (stackp != stackb && *stackp == p)
	    stackp--;
	  continue;
	  
	case on_failure_jump:
	  j = *p++ & 0377;
	  j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
	  p++;
	  *++stackp = p + j;
	  continue;

	case start_memory:
	case stop_memory:
	  p++;
	  continue;

	case duplicate:
	  bufp->can_be_null = 1;
	  fastmap['\n'] = 1;
	case anychar:
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (j != '\n')
	      fastmap[j] = 1;
	  if (bufp->can_be_null)
	    return;
	  /* Don't return; check the alternative paths
	     so we can set can_be_null if appropriate.  */
	  break;

	case wordchar:
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (SYNTAX (j) == Sword)
	      fastmap[j] = 1;
	  break;

	case notwordchar:
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (SYNTAX (j) != Sword)
	      fastmap[j] = 1;
	  break;

#ifdef emacs
	case syntaxspec:
	  k = *p++;
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (SYNTAX (j) == (enum syntaxcode) k)
	      fastmap[j] = 1;
	  break;

	case notsyntaxspec:
	  k = *p++;
	  for (j = 0; j < (1 << BYTEWIDTH); j++)
	    if (SYNTAX (j) != (enum syntaxcode) k)
	      fastmap[j] = 1;
	  break;
#endif /* emacs */

	case charset:
	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
	      {
		if (translate)
		  fastmap[translate[j]] = 1;
		else
		  fastmap[j] = 1;
	      }
	  break;

	case charset_not:
	  /* Chars beyond end of map must be allowed */
	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
	    if (translate)
	      fastmap[translate[j]] = 1;
	    else
	      fastmap[j] = 1;

	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
	      {
		if (translate)
		  fastmap[translate[j]] = 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -