⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hsregex.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 3 页
字号:
	*flagp |= flags&SPSTART;	while (*cp->regparse == '|') {		cp->regparse++;		br = regbranch(cp, &flags);		if (br == NULL)			return(NULL);		regtail(cp, ret, br);	/* BRANCH -> BRANCH. */		*flagp &= ~(~flags&HASWIDTH);		*flagp |= flags&SPSTART;	}	/* Make a closing node, and hook it on the end. */	ender = regnode(cp, (paren) ? CLOSE+parno : END);	regtail(cp, ret, ender);	/* Hook the tails of the branches to the closing node. */	for (br = ret; br != NULL; br = regnext(br))		regoptail(cp, br, ender);	/* Check for proper termination. */	if (paren && *cp->regparse++ != ')') {		FAIL("unterminated ()");	} else if (!paren && *cp->regparse != '\0') {		if (*cp->regparse == ')') {			FAIL("unmatched ()");		} else			FAIL("internal error: junk on end");		/* NOTREACHED */	}	return(ret);}/* - regbranch - one alternative of an | operator * * Implements the concatenation operator. */static char *regbranch(cp, flagp)register struct comp *cp;int *flagp;{	register char *ret;	register char *chain;	register char *latest;	int flags;	register int c;	*flagp = WORST;				/* Tentatively. */	ret = regnode(cp, BRANCH);	chain = NULL;	while ((c = *cp->regparse) != '\0' && c != '|' && c != ')') {		latest = regpiece(cp, &flags);		if (latest == NULL)			return(NULL);		*flagp |= flags&HASWIDTH;		if (chain == NULL)		/* First piece. */			*flagp |= flags&SPSTART;		else			regtail(cp, chain, latest);		chain = latest;	}	if (chain == NULL)			/* Loop ran zero times. */		(void) regnode(cp, NOTHING);	return(ret);}/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized:  they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */static char *regpiece(cp, flagp)register struct comp *cp;int *flagp;{	register char *ret;	register char op;	register char *next;	int flags;	ret = regatom(cp, &flags);	if (ret == NULL)		return(NULL);	op = *cp->regparse;	if (!ISREPN(op)) {		*flagp = flags;		return(ret);	}	if (!(flags&HASWIDTH) && op != '?')		FAIL("*+ operand could be empty");	switch (op) {	case '*':	*flagp = WORST|SPSTART;			break;	case '+':	*flagp = WORST|SPSTART|HASWIDTH;	break;	case '?':	*flagp = WORST;				break;	}	if (op == '*' && (flags&SIMPLE))		reginsert(cp, STAR, ret);	else if (op == '*') {		/* Emit x* as (x&|), where & means "self". */		reginsert(cp, BRANCH, ret);		/* Either x */		regoptail(cp, ret, regnode(cp, BACK));	/* and loop */		regoptail(cp, ret, ret);		/* back */		regtail(cp, ret, regnode(cp, BRANCH));	/* or */		regtail(cp, ret, regnode(cp, NOTHING));	/* null. */	} else if (op == '+' && (flags&SIMPLE))		reginsert(cp, PLUS, ret);	else if (op == '+') {		/* Emit x+ as x(&|), where & means "self". */		next = regnode(cp, BRANCH);		/* Either */		regtail(cp, ret, next);		regtail(cp, regnode(cp, BACK), ret);	/* loop back */		regtail(cp, next, regnode(cp, BRANCH));	/* or */		regtail(cp, ret, regnode(cp, NOTHING));	/* null. */	} else if (op == '?') {		/* Emit x? as (x|) */		reginsert(cp, BRANCH, ret);		/* Either x */		regtail(cp, ret, regnode(cp, BRANCH));	/* or */		next = regnode(cp, NOTHING);		/* null. */		regtail(cp, ret, next);		regoptail(cp, ret, next);	}	cp->regparse++;	if (ISREPN(*cp->regparse))		FAIL("nested *?+");	return(ret);}/* - regatom - the lowest level * * Optimization:  gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run.  Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */static char *regatom(cp, flagp)register struct comp *cp;int *flagp;{	register char *ret;	int flags;	*flagp = WORST;		/* Tentatively. */	switch (*cp->regparse++) {	case '^':		ret = regnode(cp, BOL);		break;	case '$':		ret = regnode(cp, EOL);		break;	case '.':		ret = regnode(cp, ANY);		*flagp |= HASWIDTH|SIMPLE;		break;	case '[': {		register int range;		register int rangeend;		register int c;		if (*cp->regparse == '^') {	/* Complement of range. */			ret = regnode(cp, ANYBUT);			cp->regparse++;		} else			ret = regnode(cp, ANYOF);		if ((c = *cp->regparse) == ']' || c == '-') {			regc(cp, c);			cp->regparse++;		}		while ((c = *cp->regparse++) != '\0' && c != ']') {			if (c != '-')				regc(cp, c);			else if ((c = *cp->regparse) == ']' || c == '\0')				regc(cp, '-');			else {				range = (unsigned char)*(cp->regparse-2);				rangeend = (unsigned char)c;				if (range > rangeend)					FAIL("invalid [] range");				for (range++; range <= rangeend; range++)					regc(cp, range);				cp->regparse++;			}		}		regc(cp, '\0');		if (c != ']')			FAIL("unmatched []");		*flagp |= HASWIDTH|SIMPLE;		break;		}	case '(':		ret = reg(cp, 1, &flags);		if (ret == NULL)			return(NULL);		*flagp |= flags&(HASWIDTH|SPSTART);		break;	case '\0':	case '|':	case ')':		/* supposed to be caught earlier */		FAIL("internal error: \\0|) unexpected");		break;	case '?':	case '+':	case '*':		FAIL("?+* follows nothing");		break;	case '\\':		if (*cp->regparse == '\0')			FAIL("trailing \\");		ret = regnode(cp, EXACTLY);		regc(cp, *cp->regparse++);		regc(cp, '\0');		*flagp |= HASWIDTH|SIMPLE;		break;	default: {		register size_t len;		register char ender;		cp->regparse--;		len = strcspn(cp->regparse, META);		if (len == 0)			FAIL("internal error: strcspn 0");		ender = *(cp->regparse+len);		if (len > 1 && ISREPN(ender))			len--;		/* Back off clear of ?+* operand. */		*flagp |= HASWIDTH;		if (len == 1)			*flagp |= SIMPLE;		ret = regnode(cp, EXACTLY);		for (; len > 0; len--)			regc(cp, *cp->regparse++);		regc(cp, '\0');		break;		}	}	return(ret);}/* - regnode - emit a node */static char *			/* Location. */regnode(cp, op)register struct comp *cp;char op;{	register char *const ret = cp->regcode;	register char *ptr;	if (!EMITTING(cp)) {		cp->regsize += 3;		return(ret);	}	ptr = ret;	*ptr++ = op;	*ptr++ = '\0';		/* Null next pointer. */	*ptr++ = '\0';	cp->regcode = ptr;	return(ret);}/* - regc - emit (if appropriate) a byte of code */static voidregc(cp, b)register struct comp *cp;char b;{	if (EMITTING(cp))		*cp->regcode++ = b;	else		cp->regsize++;}/* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */static voidreginsert(cp, op, opnd)register struct comp *cp;char op;char *opnd;{	register char *place;	if (!EMITTING(cp)) {		cp->regsize += 3;		return;	}	(void) memmove(opnd+3, opnd, (size_t)(cp->regcode - opnd));	cp->regcode += 3;	place = opnd;		/* Op node, where operand used to be. */	*place++ = op;	*place++ = '\0';	*place++ = '\0';}/* - regtail - set the next-pointer at the end of a node chain */static voidregtail(cp, p, val)register struct comp *cp;char *p;char *val;{	register char *scan;	register char *temp;	register int offset;	if (!EMITTING(cp))		return;	/* Find last node. */	for (scan = p; (temp = regnext(scan)) != NULL; scan = temp)		continue;	offset = (OP(scan) == BACK) ? scan - val : val - scan;	*(scan+1) = (offset>>8)&0177;	*(scan+2) = offset&0377;}/* - regoptail - regtail on operand of first argument; nop if operandless */static voidregoptail(cp, p, val)register struct comp *cp;char *p;char *val;{	/* "Operandless" and "op != BRANCH" are synonymous in practice. */	if (!EMITTING(cp) || OP(p) != BRANCH)		return;	regtail(cp, OPERAND(p), val);}/* * sqd_regexec and friends *//* * Work-variable struct for sqd_regexec(). */struct exec {	char *reginput;		/* String-input pointer. */	char *regbol;		/* Beginning of input, for ^ check. */	char **regstartp;	/* Pointer to startp array. */	char **regendp;		/* Ditto for endp. */};/* * Forwards. */static int regtry(struct exec *ep, sqd_regexp *rp, char *string);static int regmatch(struct exec *ep, char *prog);static size_t regrepeat(struct exec *ep, char *node);#ifdef DEBUGint regnarrate = 0;void regdump();static char *regprop();#endif/* - sqd_regexec - match a regexp against a string */intsqd_regexec(prog, str)register sqd_regexp *prog;const char *str;{	register char *string = (char *)str;	/* avert const poisoning */	register char *s;	struct exec ex;	/* Be paranoid. */	if (prog == NULL || string == NULL) {		sqd_regerror("NULL argument to sqd_regexec");		return(0);	}	/* Check validity of program. */	if ((unsigned char)*prog->program != SQD_REGMAGIC) {		sqd_regerror("corrupted regexp");		return(0);	}	/* If there is a "must appear" string, look for it. */	if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)		return(0);	/* Mark beginning of line for ^ . */	ex.regbol = string;	ex.regstartp = prog->startp;	ex.regendp = prog->endp;	/* Simplest case:  anchored match need be tried only once. */	if (prog->reganch)		return(regtry(&ex, prog, string));	/* Messy cases:  unanchored match. */	if (prog->regstart != '\0') {		/* We know what char it must start with. */		for (s = string; s != NULL; s = strchr(s+1, prog->regstart))			if (regtry(&ex, prog, s))				return(1);		return(0);	} else {		/* We don't -- general case. */		for (s = string; !regtry(&ex, prog, s); s++)			if (*s == '\0')				return(0);		return(1);	}	/* NOTREACHED */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -