⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.c

📁 操作系统源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
  *ptr++ = '\0';		/* Null "next" pointer. */  *ptr++ = '\0';  regcode = ptr;  return(ret);}/* - regc - emit (if appropriate) a byte of code */PRIVATE void regc(b)char b;{  if (regcode != &regdummy)	*regcode++ = b;  else	regsize++;}/* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */PRIVATE void reginsert(op, opnd)char op;char *opnd;{  register char *src;  register char *dst;  register char *place;  if (regcode == &regdummy) {	regsize += 3;	return;  }  src = regcode;  regcode += 3;  dst = regcode;  while (src > opnd) *--dst = *--src;  place = opnd;			/* Op node, where operand used to be. */  *place++ = op;  *place++ = '\0';  *place++ = '\0';}/* - regtail - set the next-pointer at the end of a node chain */PRIVATE void regtail(p, val)char *p;char *val;{  register char *scan;  register char *temp;  register int offset;  if (p == &regdummy) return;  /* Find last node. */  scan = p;  for (;;) {	temp = regnext(scan);	if (temp == (char *)NULL) break;	scan = temp;  }  if (OP(scan) == BACK)	offset = scan - val;  else	offset = val - scan;  *(scan + 1) = (offset >> 8) & 0377;  *(scan + 2) = offset & 0377;}/* - regoptail - regtail on operand of first argument; nop if operandless */PRIVATE void regoptail(p, val)char *p;char *val;{  /* "Operandless" and "op != BRANCH" are synonymous in practice. */  if (p == (char *)NULL || p == &regdummy || OP(p) != BRANCH) return;  regtail(OPERAND(p), val);}/* regexec and friends *//* Global work variables for regexec(). */PRIVATE char *reginput;		/* String-input pointer. */PRIVATE char *regbol;		/* Beginning of input, for ^ check. */PRIVATE char **regstartp;	/* Pointer to startp array. */PRIVATE char **regendp;		/* Ditto for endp. *//* Forwards. */STATIC _PROTOTYPE( int regtry, (regexp *prog, char *string)		);STATIC _PROTOTYPE( int regmatch, (char *prog)				);STATIC _PROTOTYPE( int regrepeat, (char *p)				);#ifdef DEBUGint regnarrate = 0;void regdump();STATIC _PROTOTYPE( char *regprop, (char *op)				);#endif/* - regexec - match a regexp against a string */int regexec(prog, string, bolflag)register regexp *prog;register char *string;int bolflag;{  register char *s;  /* Be paranoid... */  if (prog == (regexp *)NULL || string == (char *)NULL) {	regerror("NULL parameter");	return(0);  }  /* Check validity of program. */  if (UCHARAT(prog->program) != MAGIC) {	regerror("corrupted program");	return(0);  }  /* If there is a "must appear" string, look for it. */  if (prog->regmust != (char *)NULL) {	s = string;	while ((s = strchr(s, prog->regmust[0])) != (char *)NULL) {		if (strncmp(s, prog->regmust, prog->regmlen) == 0)			break;	/* Found it. */		s++;	}	if (s == (char *)NULL)		/* Not present. */		return(0);  }  /* Mark beginning of line for ^ . */  if (bolflag)	regbol = string;  else	regbol = (char *)NULL;  /* Simplest case:  anchored match need be tried only once. */  if (prog->reganch) return(regtry(prog, string));  /* Messy cases:  unanchored match. */  s = string;  if (prog->regstart != '\0') 	/* We know what char it must start with. */	while ((s = strchr(s, prog->regstart)) != (char *)NULL) {		if (regtry(prog, s)) return(1);		s++;	}  else	/* We don't -- general case. */	do {		if (regtry(prog, s)) return(1);	} while (*s++ != '\0');  /* Failure. */  return(0);}/* - regtry - try match at specific point */PRIVATE int regtry(prog, string)   /* 0 failure, 1 success */regexp *prog;char *string;{  register int i;  register char **sp;  register char **ep;  reginput = string;  regstartp = prog->startp;  regendp = prog->endp;  sp = prog->startp;  ep = prog->endp;  for (i = NSUBEXP; i > 0; i--) {	*sp++ = (char *)NULL;	*ep++ = (char *)NULL;  }  if (regmatch(prog->program + 1)) {	prog->startp[0] = string;	prog->endp[0] = reginput;	return(1);  } else	return(0);}/* - regmatch - main matching routine * * Conceptually the strategy is simple:  check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly.  In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */PRIVATE int regmatch(prog)	/* 0 failure, 1 success */ char *prog;{  register char *scan;		/* Current node. */  char *next;			/* Next node. */  scan = prog;#ifdef DEBUG  if (scan != (char *)NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(scan));#endif  while (scan != (char *)NULL) {#ifdef DEBUG	if (regnarrate) fprintf(stderr, "%s...\n", regprop(scan));#endif	next = regnext(scan);	switch (OP(scan)) {	    case BOL:		if (reginput != regbol) return(0);		break;	    case EOL:		if (*reginput != '\0') return(0);		break;	    case ANY:		if (*reginput == '\0') return(0);		reginput++;		break;	    case EXACTLY:{			register int len;			register char *opnd;			opnd = OPERAND(scan);			/* Inline the first character, for speed. */			if (*opnd != *reginput) return(0);			len = strlen(opnd);			if (len > 1 && strncmp(opnd, reginput, len) != 0)				return(0);			reginput += len;		}		break;	    case ANYOF:		if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == (char *)NULL)			return(0);		reginput++;		break;	    case ANYBUT:		if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != (char *)NULL)			return(0);		reginput++;		break;	    case NOTHING:		break;	    case BACK:		break;	    case OPEN + 1:	    case OPEN + 2:	    case OPEN + 3:	    case OPEN + 4:	    case OPEN + 5:	    case OPEN + 6:	    case OPEN + 7:	    case OPEN + 8:	    case OPEN + 9:{			register int no;			register char *save;			no = OP(scan) - OPEN;			save = reginput;			if (regmatch(next)) {				/* Don't set startp if some later				 * invocation of the same parentheses				 * already has. */				if (regstartp[no] == (char *)NULL)					regstartp[no] = save;				return(1);			} else				return(0);		}		break;	    case CLOSE + 1:	    case CLOSE + 2:	    case CLOSE + 3:	    case CLOSE + 4:	    case CLOSE + 5:	    case CLOSE + 6:	    case CLOSE + 7:	    case CLOSE + 8:	    case CLOSE + 9:{			register int no;			register char *save;			no = OP(scan) - CLOSE;			save = reginput;			if (regmatch(next)) {				/* Don't set endp if some later				 * invocation of the same parentheses				 * already has. */				if (regendp[no] == (char *)NULL) regendp[no] = save;				return(1);			} else				return(0);		}		break;	    case BRANCH:{			register char *save;			if (OP(next) != BRANCH)	/* No choice. */				next = OPERAND(scan);	/* Avoid recursion. */			else {				do {					save = reginput;					if (regmatch(OPERAND(scan)))						return(1);					reginput = save;					scan = regnext(scan);				} while (scan != (char *)NULL && OP(scan) == BRANCH);				return(0);				/* NOTREACHED */			}		}		break;	    case STAR:	    case PLUS:{			register char nextch;			register int no;			register char *save;			register int min;			/* Lookahead to avoid useless match attempts			 * when we know what character comes next. */			nextch = '\0';			if (OP(next) == EXACTLY) nextch = *OPERAND(next);			min = (OP(scan) == STAR) ? 0 : 1;			save = reginput;			no = regrepeat(OPERAND(scan));			while (no >= min) {				/* If it could work, try it. */				if (nextch == '\0' || *reginput == nextch)					if (regmatch(next)) return(1);				/* Couldn't or didn't -- back up. */				no--;				reginput = save + no;			}			return(0);		}		break;	    case END:		return(1);	/* Success! */		break;	    default:		regerror("memory corruption");		return(0);		break;	}	scan = next;  }  /* We get here only if there's trouble -- normally "case END" is the   * terminating point. */  regerror("corrupted pointers");  return(0);}/* - regrepeat - repeatedly match something simple, report how many */PRIVATE int regrepeat(p)char *p;{  register int count = 0;  register char *scan;  register char *opnd;  scan = reginput;  opnd = OPERAND(p);  switch (OP(p)) {      case ANY:	count = strlen(scan);	scan += count;	break;      case EXACTLY:	while (*opnd == *scan) {		count++;		scan++;	}	break;      case ANYOF:	while (*scan != '\0' && strchr(opnd, *scan) != (char *)NULL) {		count++;		scan++;	}	break;      case ANYBUT:	while (*scan != '\0' && strchr(opnd, *scan) == (char *)NULL) {		count++;		scan++;	}	break;      default:			/* Oh dear.  Called inappropriately. */	regerror("internal foulup");	count = 0;		/* Best compromise. */	break;  }  reginput = scan;  return(count);}/* - regnext - dig the "next" pointer out of a node */PRIVATE char *regnext(p)register char *p;{  register int offset;  if (p == &regdummy) return((char *)NULL);  offset = NEXT(p);  if (offset == 0) return((char *)NULL);  if (OP(p) == BACK)	return(p - offset);  else	return(p + offset);}#ifdef DEBUGSTATIC char *regprop();/* - regdump - dump a regexp onto stdout in vaguely comprehensible form */void regdump(r)regexp *r;{  register char *s;  register char op = EXACTLY;	/* Arbitrary non-END op. */  register char *next;  s = r->program + 1;  while (op != END) {		/* While that wasn't END last time... */	op = OP(s);	printf("%2d%s", (int) (s - r->program), regprop(s));	/* Where, what. */	next = regnext(s);	if (next == (char *)NULL)	/* Next ptr. */		printf("(0)");	else		printf("(%d)", (int) (s - r->program) + (int) (next - s));	s += 3;	if (op == ANYOF || op == ANYBUT || op == EXACTLY) {		/* Literal string, where present. */		while (*s != '\0') {			putchar(*s);			s++;		}		s++;	}	putchar('\n');  }  /* Header fields of interest. */  if (r->regstart != '\0') printf("start `%c' ", r->regstart);  if (r->reganch) printf("anchored ");  if (r->regmust != (char *)NULL) printf("must have \"%s\"", r->regmust);  printf("\n");}/* - regprop - printable representation of opcode */PRIVATE char *regprop(op)char *op;{  register char *p;  PRIVATE char buf[50];  (void) strcpy(buf, ":");  switch (OP(op)) {      case BOL:	p = "BOL";	  	break;      case EOL:	p = "EOL";	  	break;      case ANY:	p = "ANY";	  	break;      case ANYOF:	p = "ANYOF";	  	break;      case ANYBUT:	p = "ANYBUT";	  	break;      case BRANCH:	p = "BRANCH";	  	break;      case EXACTLY:	p = "EXACTLY";	  	break;      case NOTHING:	p = "NOTHING";	  	break;      case BACK:	p = "BACK";	  	break;      case END:	p = "END";	  	break;      case OPEN + 1:      case OPEN + 2:      case OPEN + 3:      case OPEN + 4:      case OPEN + 5:      case OPEN + 6:      case OPEN + 7:      case OPEN + 8:      case OPEN + 9:	sprintf(buf + strlen(buf), "OPEN%d", OP(op) - OPEN);	p = (char *)NULL;	break;      case CLOSE + 1:      case CLOSE + 2:      case CLOSE + 3:      case CLOSE + 4:      case CLOSE + 5:      case CLOSE + 6:      case CLOSE + 7:      case CLOSE + 8:      case CLOSE + 9:	sprintf(buf + strlen(buf), "CLOSE%d", OP(op) - CLOSE);	p = (char *)NULL;	break;      case STAR:	p = "STAR";	  	break;      case PLUS:	p = "PLUS";	  	break;      default:	regerror("corrupted opcode"); p = (char *) NULL; break;  }  if (p != (char *)NULL) (void) strcat(buf, p);  return(buf);}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -