📄 regex.c

📁 linux下的E_MAIL客户端源码
💻 C
📖 第 1 页 / 共 3 页
字号:
	  break;	case Ropenpar:	  SET_LEVEL_START;	  if (next_register < RE_NREGS)	    {	      bufp->uses_registers = 1;	      ALLOC(2);	      STORE(Cstart_memory);	      STORE(next_register);	      open_registers[num_open_registers++] = next_register;	      next_register++;	    }	  paren_depth++;	  PUSH_LEVEL_STARTS;	  current_level = 0;	  SET_LEVEL_START;	  break;	case Rclosepar:	  if (paren_depth <= 0)	    goto parenthesis_error;	  POP_LEVEL_STARTS;	  current_level = regexp_precedences[Ropenpar];	  paren_depth--;	  if (paren_depth < num_open_registers)	    {	      bufp->uses_registers = 1;	      ALLOC(2);	      STORE(Cend_memory);	      num_open_registers--;	      STORE(open_registers[num_open_registers]);	    }	  break;	case Rmemory:	  if (ch == '0')	    goto bad_match_register;	  assert(ch >= '0' && ch <= '9');	  bufp->uses_registers = 1;	  opcode = Cmatch_memory;	  ch -= '0';	  goto store_opcode_and_arg;	case Rextended_memory:	  NEXTCHAR(ch);	  if (ch < '0' || ch > '9')	    goto bad_match_register;	  NEXTCHAR(a);	  if (a < '0' || a > '9')	    goto bad_match_register;	  ch = 10 * (a - '0') + ch - '0';	  if (ch <= 0 || ch >= RE_NREGS)	    goto bad_match_register;	  bufp->uses_registers = 1;	  opcode = Cmatch_memory;	  goto store_opcode_and_arg;	case Ropenset:	  {	    int complement,prev,offset,range,firstchar;	    	    SET_LEVEL_START;	    ALLOC(1+256/8);	    STORE(Cset);	    offset = pattern_offset;	    for (a = 0; a < 256/8; a++)	      STORE(0);	    NEXTCHAR(ch);	    if (translate)	      ch = translate[(unsigned char)ch];	    if (ch == '\136')	      {		complement = 1;		NEXTCHAR(ch);		if (translate)		  ch = translate[(unsigned char)ch];	      }	    else	      complement = 0;	    prev = -1;	    range = 0;	    firstchar = 1;	    while (ch != '\135' || firstchar)	      {		firstchar = 0;		if (regexp_ansi_sequences && ch == '\134')		  {		    NEXTCHAR(ch);		    ANSI_TRANSLATE(ch);		  }		if (range)		  {		    for (a = prev; a <= (int)ch; a++)		      SETBIT(pattern, offset, a);		    prev = -1;		    range = 0;		  }		else		  if (prev != -1 && ch == '-')		    range = 1;		  else		    {		      SETBIT(pattern, offset, ch);		      prev = ch;		    }		NEXTCHAR(ch);		if (translate)		  ch = translate[(unsigned char)ch];	      }	    if (range)	      SETBIT(pattern, offset, '-');	    if (complement)	      {		for (a = 0; a < 256/8; a++)		  pattern[offset+a] ^= 0xff;	      }	    break;	  }	case Rbegbuf:	  opcode = Cbegbuf;	  goto store_opcode;	case Rendbuf:	  opcode = Cendbuf;	  goto store_opcode;	case Rwordchar:	  opcode = Csyntaxspec;	  ch = Sword;	  goto store_opcode_and_arg;	case Rnotwordchar:	  opcode = Cnotsyntaxspec;	  ch = Sword;	  goto store_opcode_and_arg;	case Rwordbeg:	  opcode = Cwordbeg;	  goto store_opcode;	case Rwordend:	  opcode = Cwordend;	  goto store_opcode;	case Rwordbound:	  opcode = Cwordbound;	  goto store_opcode;	case Rnotwordbound:	  opcode = Cnotwordbound;	  goto store_opcode;#ifdef emacs	case Remacs_at_dot:	  opcode = Cemacs_at_dot;	  goto store_opcode;	case Remacs_syntaxspec:	  NEXTCHAR(ch);	  if (translate)	    ch = translate[(unsigned char)ch];	  opcode = Csyntaxspec;	  ch = syntax_spec_code[(unsigned char)ch];	  goto store_opcode_and_arg;	case Remacs_notsyntaxspec:	  NEXTCHAR(ch);	  if (translate)	    ch = translate[(unsigned char)ch];	  opcode = Cnotsyntaxspec;	  ch = syntax_spec_code[(unsigned char)ch];	  goto store_opcode_and_arg;#endif /* emacs */	default:	  abort();	}      beginning_context = (op == Ropenpar || op == Ror);    }  if (starts_base != 0)    goto parenthesis_error;  assert(num_jumps == 0);  ALLOC(1);  STORE(Cend);  SET_FIELDS;  return NULL; op_error:  SET_FIELDS;  return "Badly placed special character"; bad_match_register:  SET_FIELDS;  return "Bad match register number"; hex_error:  SET_FIELDS;  return "Bad hexadecimal number"; parenthesis_error:  SET_FIELDS;  return "Badly placed parenthesis"; out_of_memory:  SET_FIELDS;  return "Out of memory"; ends_prematurely:  SET_FIELDS;  return "Regular expression ends prematurely"; too_complex:  SET_FIELDS;  return "Regular expression too complex";}#undef CHARAT#undef NEXTCHAR#undef GETHEX#undef ALLOC#undef STORE#undef CURRENT_LEVEL_START#undef SET_LEVEL_START#undef PUSH_LEVEL_STARTS#undef POP_LEVEL_STARTS#undef PUT_ADDR#undef INSERT_JUMP#undef SETBIT#undef SET_FIELDSstatic void hre_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)char *code, *visited, *can_be_null, *fastmap;int pos;{  int a, b, syntaxcode;  if (visited[pos])    return;  /* we have already been here */  visited[pos] = 1;  for (;;)    switch (code[pos++])      {      case Cend:	*can_be_null = 1;	return;      case Cbol:      case Cbegbuf:      case Cendbuf:      case Cwordbeg:      case Cwordend:      case Cwordbound:      case Cnotwordbound:#ifdef emacs      case Cemacs_at_dot:#endif /* emacs */	break;      case Csyntaxspec:	syntaxcode = code[pos++];	for (a = 0; a < 256; a++)	  if (SYNTAX(a) == syntaxcode)	    fastmap[a] = 1;	return;      case Cnotsyntaxspec:	syntaxcode = code[pos++];	for (a = 0; a < 256; a++)	  if (SYNTAX(a) != syntaxcode)	    fastmap[a] = 1;	return;      case Ceol:	fastmap['\n'] = 1;	if (*can_be_null == 0)	  *can_be_null = 2;  /* can match null, but only at end of buffer*/	return;      case Cset:	for (a = 0; a < 256/8; a++)	  if (code[pos + a] != 0)	    for (b = 0; b < 8; b++)	      if (code[pos + a] & (1 << b))		fastmap[(a << 3) + b] = 1;	pos += 256/8;	return;      case Cexact:	fastmap[(unsigned char)code[pos]] = 1;	return;      case Canychar:	for (a = 0; a < 256; a++)	  if (a != '\n')	    fastmap[a] = 1;	return;      case Cstart_memory:      case Cend_memory:	pos++;	break;      case Cmatch_memory:	/* should this ever happen for sensible patterns??? */	*can_be_null = 1;	return;      case Cjump:      case Cdummy_failure_jump:      case Cupdate_failure_jump:      case Cstar_jump:	a = (unsigned char)code[pos++];	a |= (unsigned char)code[pos++] << 8;	pos += (int)(short)a;	if (visited[pos])	  {	    /* argh... the regexp contains empty loops.  This is not	       good, as this may cause a failure stack overflow when	       matching.  Oh well. */	    /* this path leads nowhere; pursue other paths. */	    return;	  }	visited[pos] = 1;	break;      case Cfailure_jump:	a = (unsigned char)code[pos++];	a |= (unsigned char)code[pos++] << 8;	a = pos + (int)(short)a;	hre_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);	break;      default:	abort();  /* probably some opcode is missing from this switch */	/*NOTREACHED*/      }}static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap)char *buffer, *fastmap, *can_be_null;int used, pos;{  char small_visited[512], *visited;  if (used <= sizeof(small_visited))    visited = small_visited;  else    {      visited = malloc(used);      if (!visited)	return 0;    }  *can_be_null = 0;  memset(fastmap, 0, 256);  memset(visited, 0, used);  hre_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);  if (visited != small_visited)    free(visited);  return 1;}void hre_compile_fastmap(bufp)regexp_t bufp;{  if (!bufp->fastmap || bufp->fastmap_accurate)    return;  assert(bufp->used > 0);  if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null,			     bufp->fastmap))    return;  if (bufp->buffer[0] == Cbol)    bufp->anchor = 1;   /* begline */  else    if (bufp->buffer[0] == Cbegbuf)      bufp->anchor = 2; /* begbuf */    else      bufp->anchor = 0; /* none */  bufp->fastmap_accurate = 1;}#define INITIAL_FAILURES  128  /* initial # failure points to allocate */#define MAX_FAILURES     4100  /* max # of failure points before failing */int hre_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop)regexp_t bufp;char *string1, *string2;int size1, size2, pos, mstop;regexp_registers_t regs;{  struct failure_point { char *text, *partend, *code; }    *failure_stack_start, *failure_sp, *failure_stack_end,    initial_failure_stack[INITIAL_FAILURES];  char *code, *translate, *text, *textend, *partend, *part_2_end;  char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS];  char *regend_text[RE_NREGS], *regend_partend[RE_NREGS];  int a, b, ch, reg, regch, match_end;  char *regtext, *regpartend, *regtextend;#define PREFETCH					\  MACRO_BEGIN						\    if (text == partend)				\      {							\	if (text == textend)				\	  goto fail;					\	text = string2;					\	partend = part_2_end;				\      }							\  MACRO_END#define NEXTCHAR(var)				\  MACRO_BEGIN					\    PREFETCH;					\    (var) = (unsigned char)*text++;		\    if (translate)				\      (var) = (unsigned char)translate[(var)];	\  MACRO_END  assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0);  assert(mstop <= size1 + size2);  assert(pos <= mstop);  if (pos <= size1)    {      text = string1 + pos;      if (mstop <= size1)	{	  partend = string1 + mstop;	  textend = partend;	}      else	{	  partend = string1 + size1;	  textend = string2 + mstop - size1;	}      part_2_end = string2 + mstop - size1;    }  else    {      text = string2 + pos - size1;      partend = string2 + mstop - size1;      textend = partend;      part_2_end = partend;    }  if (bufp->uses_registers && regs != NULL)    for (a = 0; a < RE_NREGS; a++)      regend_text[a] = NULL;  code = bufp->buffer;  translate = bufp->translate;  failure_stack_start = failure_sp = initial_failure_stack;  failure_stack_end = initial_failure_stack + INITIAL_FAILURES;#if 0  /* hre_search_2 has already done this, and otherwise we get little benefit     from this.  So I'll leave this out. */  if (bufp->fastmap_accurate && !bufp->can_be_null &&      text != textend &&      !bufp->fastmap[translate ?		     (unsigned char)translate[(unsigned char)*text] :		     (unsigned char)*text])    return -1;  /* it can't possibly match */#endif continue_matching:  for (;;)    {      switch (*code++)	{	case Cend:	  if (partend != part_2_end)	    match_end = text - string1;	  else	    match_end = text - string2 + size1;	  if (regs)	    {	      regs->start[0] = pos;	      regs->end[0] = match_end;	      if (!bufp->uses_registers)		{		  for (a = 1; a < RE_NREGS; a++)		    {		      regs->start[a] = -1;		      regs->end[a] = -1;		    }		}	      else		{		  for (a = 1; a < RE_NREGS; a++)		    {		      if (regend_text[a] == NULL)			{			  regs->start[a] = -1;			  regs->end[a] = -1;			  continue;			}		      if (regstart_partend[a] != part_2_end)			regs->start[a] = regstart_text[a] - string1;		      else			regs->start[a] = regstart_text[a] - string2 + size1;		      if (regend_partend[a] != part_2_end)			regs->end[a] = regend_text[a] - string1;		      else			regs->end[a] = regend_text[a] - string2 + size1;		    }		}	    }	  if (failure_stack_start != initial_failure_stack)	    free((char *)failure_stack_start);	  return match_end - pos;	case Cbol:	  if (text == string1 || text[-1] == '\n') /* text[-1] always valid */	    break;	  goto fail;	case Ceol:	  if (text == string2 + size2 ||	      (text == string1 + size1 ?	       (size2 == 0 || *string2 == '\n') :	       *text == '\n'))	    break;	  goto fail;	case Cset:	  NEXTCHAR(ch);	  if (code[ch/8] & (1<<(ch & 7)))	    {	      code += 256/8;	      break;	    }	  goto fail;	case Cexact:	  NEXTCHAR(ch);	  if (ch != (unsigned char)*code++)	    goto fail;	  break;	case Canychar:	  NEXTCHAR(ch);	  if (ch == '\n')	    goto fail;	  break;	case Cstart_memory:	  reg = *code++;	  regstart_text[reg] = text;	  regstart_partend[reg] = partend;	  break;	case Cend_memory:	  reg = *code++;	  regend_text[reg] = text;	  regend_partend[reg] = partend;	  break;	case Cmatch_memory:	  reg = *code++;	  if (regend_text[reg] == NULL)	    goto fail;  /* or should we just match nothing? */	  regtext = regstart_text[reg];	  regtextend = regend_text[reg];	  if (regstart_partend[reg] == regend_partend[reg])	    regpartend = regtextend;	  else	    regpartend = string1 + size1;	  	  for (;regtext != regtextend;)	    {	      NEXTCHAR(ch);	      if (regtext == regpartend)		regtext = string2;	      regch = (unsigned char)*regtext++;	      if (translate)		regch = (unsigned char)translate[regch];	      if (regch != ch)		goto fail;	    }	  break;	case Cstar_jump:	  /* star is coded as:	       1: failure_jump 2	          ... code for operand of star		  star_jump 1	       2: ... code after star	     We change the star_jump to update_failure_jump if we can determine	     that it is safe to do so; otherwise we change it to an ordinary	     jump.	     plus is coded as	          jump 2	       1: failure_jump 3	       2: ... code for operand of plus	          star_jump 1	       3: ... code after plus	     For star_jump considerations this is processed identically	     to star. */	  a = (unsigned char)*code++;	  a |= (unsigned char)*code++ << 8;	  a = (int)(short)a;	  {	    char map[256], can_be_null;	    char *p1, *p2;	    p1 = code + a + 3; /* skip the failure_jump */	    assert(p1[-3] == Cfailure_jump);
💿 文件大小 1827 K
👤 上传用户 xiaoyunwang
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#E_MAIL #linux #源码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -