⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.c

📁 操作系统源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
		register int class;		register int classend;		register int c;		if (*regparse == '^') {	/* Complement of range. */			ret = regnode(ANYBUT);			regparse++;		} else			ret = regnode(ANYOF);		if (*regparse == ']' || *regparse == '-')			regc(*regparse++);		while (*regparse != '\0' && *regparse != ']') {			if (*regparse == '-') {				regparse++;				if (*regparse == ']' || *regparse == '\0')					regc((ushort) '-');				else {					class = UCHARAT(regparse-2)+1;					classend = UCHARAT(regparse);					if (class > classend+1)						FAIL("invalid [] range");					regc((ushort) 0xffff);					regc((ushort) class);					regc((ushort) classend);					regparse++;				}			} else {				if ((c = *regparse++) == '\\') {					if ((c = *regparse++) == 'n')						c = '\n';					else if (c == 't')						c = '\t';				}				regc(c);			}		}		regc((ushort) 0);		if (*regparse != ']')			FAIL("unmatched []");		regparse++;		*flagp |= HASWIDTH|SIMPLE;	}	break;  case '(':	ret = reg(1, &flags);	if (ret == NULL)		return(NULL);	*flagp |= flags&(HASWIDTH|SPSTART);	break;  case '\0':  case '|':  case ')':	FAIL("internal urp");	/* Supposed to be caught earlier. */	break;  case '?':  case '+':  case '*':	FAIL("?+* follows nothing");	break;  case '\\':	if (*regparse == '\0')		FAIL("trailing \\");	ret = regnode(EXACTLY);/*	regc(*regparse++);*/	c = *regparse++;	if (c == 'n')		c = '\n';	else if (c == 't')		c = '\t';	else if (c == 'f')		c = '\f';	else if (c == '\r')		c = '\r';	else if (c == '\b')		c = '\b';	else if (IsDigid(c)) {		d = c - '0';		if (IsDigid(*regparse)) {			d = d * 8 + *regparse++ - '0';			if (IsDigid(*regparse))				d = d * 8 + *regparse++ - '0';		}		c = d;	}	regc(c);	regc((ushort) 0);	*flagp |= HASWIDTH|SIMPLE;	break;  default: {		register int len;		register char ender;		regparse--;		len = Strcspn(regparse, META);		if (len <= 0)			FAIL("internal disaster");		ender = *(regparse+len);		if (len > 1 && ISMULT(ender))			len--;		/* Back off clear of ?+* operand. */		*flagp |= HASWIDTH;		if (len == 1)			*flagp |= SIMPLE;		ret = regnode(EXACTLY);		while (len > 0) {			regc(*regparse++);			len--;		}		regc((ushort) 0);	}	break;  }  return(ret);}IsDigid(c) ushort c;{  return '0' <= c && c <= '9';}/* - regnode - emit a node */static ushort *			/* Location. */regnode(op)ushort op;{  register ushort *ret;  register ushort *ptr;  ret = regcode;  if (ret == &regdummy) {	regsize += 3;	return(ret);  }  ptr = ret;  *ptr++ = op;  *ptr++ = '\0';		/* Null "next" pointer. */  *ptr++ = '\0';  regcode = ptr;  return(ret);}/* - regc - emit (if appropriate) a byte of code */static voidregc(b)ushort b;{  if (regcode != &regdummy)	*regcode++ = b;  else	regsize++;}/* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */static voidreginsert(op, opnd)ushort op;ushort *opnd;{  register ushort *src;  register ushort *dst;  register ushort *place;  if (regcode == &regdummy) {	regsize += 3;	return;  }  src = regcode;  regcode += 3;  dst = regcode;  while (src > opnd)	*--dst = *--src;  place = opnd;		/* Op node, where operand used to be. */  *place++ = op;  *place++ = '\0';  *place++ = '\0';}/* - regtail - set the next-pointer at the end of a node chain */static voidregtail(p, val)ushort *p;ushort *val;{  register ushort *scan;  register ushort *temp;  register int offset;  if (p == &regdummy)	return;  /* Find last node. */  scan = p;  for (;;) {	temp = regnext(scan);	if (temp == NULL)		break;	scan = temp;  }  if (OP(scan) == BACK)	offset = scan - val;  else	offset = val - scan;  *(scan+1) = (offset>>8)&0377;  *(scan+2) = offset&0377;}/* - regoptail - regtail on operand of first argument; nop if operandless */static voidregoptail(p, val)ushort *p;ushort *val;{  /* "Operandless" and "op != BRANCH" are synonymous in practice. */  if (p == NULL || p == &regdummy || OP(p) != BRANCH)	return;  regtail(OPERAND(p), val);}/* * regexec and friends *//* * Global work variables for regexec(). */static ushort *reginput;		/* String-input pointer. */static ushort *regbol;		/* Beginning of input, for ^ check. */static ushort **regstartp;	/* Pointer to startp array. */static ushort **regendp;		/* Ditto for endp. *//* * Forwards. */STATIC int regtry();STATIC int regmatch();STATIC int regrepeat();#ifdef DEBUGint regnarrate = 0;void regdump();STATIC char *regprop();#endif/* - regexec - match a regexp against a string */intregexec(prog, string, bolflag)register regexp *prog;register ushort *string;int bolflag;{  register ushort *s;  extern ushort *Strchr();  /* Be paranoid... */  if (prog == NULL || string == NULL) {	regerror("NULL parameter");	return(0);  }  /* Check validity of program. */  if (prog->program[0] != MAGIC) {	regerror("corrupted program");	return(0);  }  /* If there is a "must appear" string, look for it. */  if (prog->regmust != NULL) {	s = string;	while ((s = Strchr(s, prog->regmust[0])) != NULL) {		if (Strncmp(s, prog->regmust, prog->regmlen) == 0)			break;	/* Found it. */		s++;	}	if (s == NULL)	/* Not present. */		return(0);  }  /* Mark beginning of line for ^ . */  if(bolflag)	regbol = string;  else	regbol = NULL;  /* Simplest case:  anchored match need be tried only once. */  if (prog->reganch)	return(regtry(prog, string));  /* Messy cases:  unanchored match. */  s = string;  if (prog->regstart != '\0') {	/* We know what char it must start with. */	while ((s = Strchr(s, prog->regstart)) != NULL) {		if (regtry(prog, s))			return(1);		s++;	}}  else	/* We don't -- general case. */	do {		if (regtry(prog, s))			return(1);	} while (*s++ != '\0');  /* Failure. */  return(0);}/* - regtry - try match at specific point */static int			/* 0 failure, 1 success */regtry(prog, string)regexp *prog;ushort *string;{  register int i;  register ushort **sp;  register ushort **ep;  reginput = string;  regstartp = prog->startp;  regendp = prog->endp;  sp = prog->startp;  ep = prog->endp;  for (i = NSUBEXP; i > 0; i--) {	*sp++ = NULL;	*ep++ = NULL;  }  if (regmatch(prog->program + 1)) {	prog->startp[0] = string;	prog->endp[0] = reginput;	return(1);  } else	return(0);}/* - regmatch - main matching routine * * Conceptually the strategy is simple:  check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly.  In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */static int			/* 0 failure, 1 success */regmatch(prog)ushort *prog;{  register ushort *scan;	/* Current node. */  ushort *next;		/* Next node. */  extern ushort *Strchr();  scan = prog;#ifdef DEBUG  if (scan != NULL && regnarrate)	fprintf(stderr, "%s(\n", regprop(scan));#endif  while (scan != NULL) {#ifdef DEBUG	if (regnarrate)		fprintf(stderr, "%s...\n", regprop(scan));#endif	next = regnext(scan);	switch ((int) OP(scan)) {	case BOL:		if (reginput != regbol)			return(0);		break;	case EOL:		if (*reginput != '\0')			return(0);		break;	case ANY:		if (*reginput == '\0')			return(0);		reginput++;		break;	case EXACTLY: {			register int len;			register ushort *opnd;			opnd = OPERAND(scan);			/* Inline the first character, for speed. */			if (*opnd != *reginput)				return(0);			len = Strlen(opnd);			if (len > 1 && Strncmp(opnd, reginput, len) != 0)				return(0);			reginput += len;		}		break;	case ANYOF:		if (*reginput == '\0' || isthere(OPERAND(scan), *reginput) == 0)			return(0);		reginput++;		break;	case ANYBUT:		if (*reginput == '\0' || isthere(OPERAND(scan), *reginput) != 0)			return(0);		reginput++;		break;	case NOTHING:		break;	case BACK:		break;	case OPEN+1:	case OPEN+2:	case OPEN+3:	case OPEN+4:	case OPEN+5:	case OPEN+6:	case OPEN+7:	case OPEN+8:	case OPEN+9: {			register int no;			register ushort *save;			no = OP(scan) - OPEN;			save = reginput;			if (regmatch(next)) {				/*				 * Don't set startp if some later				 * invocation of the same parentheses				 * already has.				 */				if (regstartp[no] == NULL)					regstartp[no] = save;				return(1);			} else				return(0);		}		break;	case CLOSE+1:	case CLOSE+2:	case CLOSE+3:	case CLOSE+4:	case CLOSE+5:	case CLOSE+6:	case CLOSE+7:	case CLOSE+8:	case CLOSE+9: {			register int no;			register ushort *save;			no = OP(scan) - CLOSE;			save = reginput;			if (regmatch(next)) {				/*				 * Don't set endp if some later				 * invocation of the same parentheses				 * already has.				 */				if (regendp[no] == NULL)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -