⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 早期freebsd实现
💻 C
📖 第 1 页 / 共 3 页
字号:
/* Extended regular expression matching and search library.   Copyright (C) 1985, 1989 Free Software Foundation, Inc.This program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or(at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  *//* To test, compile with -Dtest. This Dtestable feature turns this into a self-contained program which reads a pattern, describes how it compiles, then reads a string and searches for it.  */#ifdef emacs/* The `emacs' switch turns on certain special matching commands that make sense only in emacs. */#include "config.h"#include "lisp.h"#include "buffer.h"#include "syntax.h"#else  /* not emacs */#define bcopy(s,d,n)	memcpy((d),(s),(n))#define bcmp(s1,s2,n)	memcmp((s1),(s2),(n))#define bzero(s,n)	memset((s),0,(n))/* Make alloca work the best possible way.  */#ifdef __GNUC__#define alloca __builtin_alloca#else#ifdef sparc#include <alloca.h>#endif#endif/* * Define the syntax stuff, so we can do the \<...\> things. */#ifndef Sword /* must be non-zero in some of the tests below... */#define Sword 1#endif#define SYNTAX(c) re_syntax_table[c]#ifdef SYNTAX_TABLEchar *re_syntax_table;#elsestatic char re_syntax_table[256];static voidinit_syntax_once (){   register int c;   static int done = 0;   if (done)     return;   bzero (re_syntax_table, sizeof re_syntax_table);   for (c = 'a'; c <= 'z'; c++)     re_syntax_table[c] = Sword;   for (c = 'A'; c <= 'Z'; c++)     re_syntax_table[c] = Sword;   for (c = '0'; c <= '9'; c++)     re_syntax_table[c] = Sword;   done = 1;}#endif /* SYNTAX_TABLE */#endif /* not emacs */#include "regex.h"/* Number of failure points to allocate space for initially, when matching.  If this number is exceeded, more space is allocated, so it is not a hard limit.  */#ifndef NFAILURES#define NFAILURES 80#endif /* NFAILURES *//* width of a byte in bits */#define BYTEWIDTH 8#ifndef SIGN_EXTEND_CHAR#define SIGN_EXTEND_CHAR(x) (x)#endifstatic int obscure_syntax = 0;/* Specify the precise syntax of regexp for compilation.   This provides for compatibility for various utilities   which historically have different, incompatible syntaxes.   The argument SYNTAX is a bit-mask containing the two bits   RE_NO_BK_PARENS and RE_NO_BK_VBAR.  */intre_set_syntax (syntax)     int syntax;{  int ret;  ret = obscure_syntax;  obscure_syntax = syntax;  return ret;}/* re_compile_pattern takes a regular-expression string   and converts it into a buffer full of byte commands for matching.  PATTERN   is the address of the pattern string  SIZE      is the length of it.  BUFP	    is a  struct re_pattern_buffer *  which points to the info	    on where to store the byte commands.	    This structure contains a  char *  which points to the	    actual space, which should have been obtained with malloc.	    re_compile_pattern may use  realloc  to grow the buffer space.  The number of bytes of commands can be found out by looking in  the  struct re_pattern_buffer  that bufp pointed to,  after re_compile_pattern returns.*/#define PATPUSH(ch) (*b++ = (char) (ch))#define PATFETCH(c) \ {if (p == pend) goto end_of_pattern; \  c = * (unsigned char *) p++; \  if (translate) c = translate[c]; }#define PATFETCH_RAW(c) \ {if (p == pend) goto end_of_pattern; \  c = * (unsigned char *) p++; }#define PATUNFETCH p--#define EXTEND_BUFFER \  { char *old_buffer = bufp->buffer; \    if (bufp->allocated == (1<<16)) goto too_big; \    bufp->allocated *= 2; \    if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \    if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \      goto memory_exhausted; \    c = bufp->buffer - old_buffer; \    b += c; \    if (fixup_jump) \      fixup_jump += c; \    if (laststart) \      laststart += c; \    begalt += c; \    if (pending_exact) \      pending_exact += c; \  }static void store_jump (), insert_jump ();char *re_compile_pattern (pattern, size, bufp)     char *pattern;     int size;     struct re_pattern_buffer *bufp;{  register char *b = bufp->buffer;  register char *p = pattern;  char *pend = pattern + size;  register unsigned c, c1;  char *p1;  unsigned char *translate = (unsigned char *) bufp->translate;  /* address of the count-byte of the most recently inserted "exactn" command.    This makes it possible to tell whether a new exact-match character    can be added to that command or requires a new "exactn" command. */       char *pending_exact = 0;  /* address of the place where a forward-jump should go    to the end of the containing expression.    Each alternative of an "or", except the last, ends with a forward-jump    of this sort. */  char *fixup_jump = 0;  /* address of start of the most recently finished expression.    This tells postfix * where to find the start of its operand. */  char *laststart = 0;  /* In processing a repeat, 1 means zero matches is allowed */  char zero_times_ok;  /* In processing a repeat, 1 means many matches is allowed */  char many_times_ok;  /* address of beginning of regexp, or inside of last \( */  char *begalt = b;  /* Stack of information saved by \( and restored by \).     Four stack elements are pushed by each \(:       First, the value of b.       Second, the value of fixup_jump.       Third, the value of regnum.       Fourth, the value of begalt.  */  int stackb[40];  int *stackp = stackb;  int *stacke = stackb + 40;  int *stackt;  /* Counts \('s as they are encountered.  Remembered for the matching \),     where it becomes the "register number" to put in the stop_memory command */  int regnum = 1;  bufp->fastmap_accurate = 0;#ifndef emacs#ifndef SYNTAX_TABLE  /*   * Initialize the syntax table.   */   init_syntax_once();#endif#endif  if (bufp->allocated == 0)    {      bufp->allocated = 28;      if (bufp->buffer)	/* EXTEND_BUFFER loses when bufp->allocated is 0 */	bufp->buffer = (char *) realloc (bufp->buffer, 28);      else	/* Caller did not allocate a buffer.  Do it for him */	bufp->buffer = (char *) malloc (28);      if (!bufp->buffer) goto memory_exhausted;      begalt = b = bufp->buffer;    }  while (p != pend)    {      if (b - bufp->buffer > bufp->allocated - 10)	/* Note that EXTEND_BUFFER clobbers c */	EXTEND_BUFFER;      PATFETCH (c);      switch (c)	{	case '$':	  if (obscure_syntax & RE_TIGHT_VBAR)	    {	      if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)		goto normal_char;	      /* Make operand of last vbar end before this `$'.  */	      if (fixup_jump)		store_jump (fixup_jump, jump, b);	      fixup_jump = 0;	      PATPUSH (endline);	      break;	    }	  /* $ means succeed if at end of line, but only in special contexts.	    If randomly in the middle of a pattern, it is a normal character. */	  if (p == pend || *p == '\n'	      || (obscure_syntax & RE_CONTEXT_INDEP_OPS)	      || (obscure_syntax & RE_NO_BK_PARENS		  ? *p == ')'		  : *p == '\\' && p[1] == ')')	      || (obscure_syntax & RE_NO_BK_VBAR		  ? *p == '|'		  : *p == '\\' && p[1] == '|'))	    {	      PATPUSH (endline);	      break;	    }	  goto normal_char;	case '^':	  /* ^ means succeed if at beg of line, but only if no preceding pattern. */	  if (laststart && p[-2] != '\n'	      && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))	    goto normal_char;	  if (obscure_syntax & RE_TIGHT_VBAR)	    {	      if (p != pattern + 1		  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))		goto normal_char;	      PATPUSH (begline);	      begalt = b;	    }	  else	    PATPUSH (begline);	  break;	case '+':	case '?':	  if (obscure_syntax & RE_BK_PLUS_QM)	    goto normal_char;	handle_plus:	case '*':	  /* If there is no previous pattern, char not special. */	  if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))	    goto normal_char;	  /* If there is a sequence of repetition chars,	     collapse it down to equivalent to just one.  */	  zero_times_ok = 0;	  many_times_ok = 0;	  while (1)	    {	      zero_times_ok |= c != '+';	      many_times_ok |= c != '?';	      if (p == pend)		break;	      PATFETCH (c);	      if (c == '*')		;	      else if (!(obscure_syntax & RE_BK_PLUS_QM)		       && (c == '+' || c == '?'))		;	      else if ((obscure_syntax & RE_BK_PLUS_QM)		       && c == '\\')		{		  int c1;		  PATFETCH (c1);		  if (!(c1 == '+' || c1 == '?'))		    {		      PATUNFETCH;		      PATUNFETCH;		      break;		    }		  c = c1;		}	      else		{		  PATUNFETCH;		  break;		}	    }	  /* Star, etc. applied to an empty pattern is equivalent	     to an empty pattern.  */	  if (!laststart)	    break;	  /* Now we know whether 0 matches is allowed,	     and whether 2 or more matches is allowed.  */	  if (many_times_ok)	    {	      /* If more than one repetition is allowed,		 put in a backward jump at the end.  */	      store_jump (b, maybe_finalize_jump, laststart - 3);	      b += 3;	    }	  insert_jump (on_failure_jump, laststart, b + 3, b);	  pending_exact = 0;	  b += 3;	  if (!zero_times_ok)	    {	      /* At least one repetition required: insert before the loop		 a skip over the initial on-failure-jump instruction */	      insert_jump (dummy_failure_jump, laststart, laststart + 6, b);	      b += 3;	    }	  break;	case '.':	  laststart = b;	  PATPUSH (anychar);	  break;	case '[':	  while (b - bufp->buffer		 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)	    /* Note that EXTEND_BUFFER clobbers c */	    EXTEND_BUFFER;	  laststart = b;	  if (*p == '^')	    PATPUSH (charset_not), p++;	  else	    PATPUSH (charset);	  p1 = p;	  PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);	  /* Clear the whole map */	  bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);	  /* Read in characters and ranges, setting map bits */	  while (1)	    {	      PATFETCH (c);	      if (c == ']' && p != p1 + 1) break;	      if (*p == '-' && p[1] != ']')		{		  PATFETCH (c1);		  PATFETCH (c1);		  while (c <= c1)		    b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;		}	      else		{		  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);		}	    }	  /* Discard any bitmap bytes that are all 0 at the end of the map.	     Decrement the map-length byte too. */	  while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)	    b[-1]--;	  b += b[-1];	  break;	case '(':	  if (! (obscure_syntax & RE_NO_BK_PARENS))	    goto normal_char;	  else	    goto handle_open;	case ')':	  if (! (obscure_syntax & RE_NO_BK_PARENS))	    goto normal_char;	  else	    goto handle_close;	case '\n':	  if (! (obscure_syntax & RE_NEWLINE_OR))	    goto normal_char;	  else	    goto handle_bar;	case '|':	  if (! (obscure_syntax & RE_NO_BK_VBAR))	    goto normal_char;	  else	    goto handle_bar;        case '\\':	  if (p == pend) goto invalid_pattern;	  PATFETCH_RAW (c);	  switch (c)	    {	    case '(':	      if (obscure_syntax & RE_NO_BK_PARENS)		goto normal_backsl;	    handle_open:	      if (stackp == stacke) goto nesting_too_deep;	      if (regnum < RE_NREGS)	        {		  PATPUSH (start_memory);		  PATPUSH (regnum);	        }	      *stackp++ = b - bufp->buffer;	      *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;	      *stackp++ = regnum++;	      *stackp++ = begalt - bufp->buffer;	      fixup_jump = 0;	      laststart = 0;	      begalt = b;	      break;	    case ')':	      if (obscure_syntax & RE_NO_BK_PARENS)		goto normal_backsl;	    handle_close:	      if (stackp == stackb) goto unmatched_close;	      begalt = *--stackp + bufp->buffer;	      if (fixup_jump)		store_jump (fixup_jump, jump, b);	      if (stackp[-1] < RE_NREGS)		{		  PATPUSH (stop_memory);		  PATPUSH (stackp[-1]);		}	      stackp -= 2;	      fixup_jump = 0;	      if (*stackp)		fixup_jump = *stackp + bufp->buffer - 1;	      laststart = *--stackp + bufp->buffer;	      break;	    case '|':	      if (obscure_syntax & RE_NO_BK_VBAR)		goto normal_backsl;	    handle_bar:	      insert_jump (on_failure_jump, begalt, b + 6, b);	      pending_exact = 0;	      b += 3;	      if (fixup_jump)		store_jump (fixup_jump, jump, b);	      fixup_jump = b;	      b += 3;	      laststart = 0;	      begalt = b;	      break;#ifdef emacs	    case '=':	      PATPUSH (at_dot);	      break;	    case 's':		      laststart = b;	      PATPUSH (syntaxspec);	      PATFETCH (c);	      PATPUSH (syntax_spec_code[c]);	      break;	    case 'S':	      laststart = b;	      PATPUSH (notsyntaxspec);	      PATFETCH (c);	      PATPUSH (syntax_spec_code[c]);	      break;#endif /* emacs */	    case 'w':	      laststart = b;	      PATPUSH (wordchar);	      break;	    case 'W':	      laststart = b;	      PATPUSH (notwordchar);	      break;	    case '<':	      PATPUSH (wordbeg);	      break;	    case '>':	      PATPUSH (wordend);	      break;	    case 'b':	      PATPUSH (wordbound);	      break;	    case 'B':	      PATPUSH (notwordbound);	      break;	    case '`':	      PATPUSH (begbuf);	      break;	    case '\'':	      PATPUSH (endbuf);	      break;	    case '1':	    case '2':	    case '3':	    case '4':	    case '5':

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -