⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pgpregexp.c

📁 vc环境下的pgp源码
💻 C
📖 第 1 页 / 共 3 页
字号:
	char *next;
	int flags;

	ret = regatom(rcs, &flags);
	if (ret == NULL)
		return(NULL);

	op = *rcs->regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != '?')
		FAIL("*+ operand could be empty");
	*flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);

	if (op == '*' && (flags&SIMPLE))
		reginsert(rcs, STAR, ret);
	else if (op == '*') {
		/* Emit x* as (x&|), where & means "self". */
		reginsert(rcs, BRANCH, ret);			/* Either x */
		regoptail(ret, regnode(rcs, BACK));		/* and loop */
		regoptail(ret, ret);				/* back */
		regtail(ret, regnode(rcs, BRANCH));		/* or */
		regtail(ret, regnode(rcs, NOTHING));	/* null. */
	} else if (op == '+' && (flags&SIMPLE))
		reginsert(rcs, PLUS, ret);
	else if (op == '+') {
		/* Emit x+ as x(&|), where & means "self". */
		next = regnode(rcs, BRANCH);			/* Either */
		regtail(ret, next);
		regtail(regnode(rcs, BACK), ret);		/* loop back */
		regtail(next, regnode(rcs, BRANCH));		/* or */
		regtail(ret, regnode(rcs, NOTHING));		/* null. */
	} else if (op == '?') {
		/* Emit x? as (x|) */
		reginsert(rcs, BRANCH, ret);			/* Either x */
		regtail(ret, regnode(rcs, BRANCH));		/* or */
		next = regnode(rcs, NOTHING);		/* null. */
		regtail(ret, next);
		regoptail(ret, next);
	}
	rcs->regparse++;
	if (ISMULT(*rcs->regparse))
		FAIL("nested *?+");

	return(ret);
}

/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *
regatom(regcompState *rcs, int *flagp)
{
	char *ret;
	int flags;

	*flagp = WORST;		/* Tentatively. */

	switch (*rcs->regparse++) {
	case '^':
		ret = regnode(rcs, BOL);
		break;
	case '$':
		ret = regnode(rcs, EOL);
		break;
	case '.':
		ret = regnode(rcs, ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			int cclass;
			int classend;

			if (*rcs->regparse == '^') {	/* Complement of range. */
				ret = regnode(rcs, ANYBUT);
				rcs->regparse++;
			} else
				ret = regnode(rcs, ANYOF);
			if (*rcs->regparse == ']' || *rcs->regparse == '-')
				regc(rcs, *rcs->regparse++);
			while (*rcs->regparse != '\0' && *rcs->regparse != ']') {
				if (*rcs->regparse == '-') {
					rcs->regparse++;
					if (*rcs->regparse == ']' || *rcs->regparse == '\0')
						regc(rcs, '-');
					else {
						cclass = UCHARAT(rcs->regparse-2)+1;
						classend = UCHARAT(rcs->regparse);
						if (cclass > classend+1)
							FAIL("invalid [] range");
						for (; cclass <= classend; cclass++)
							regc(rcs, (char)cclass);
						rcs->regparse++;
					}
				} else
					regc(rcs, *rcs->regparse++);
			}
			regc(rcs, '\0');
			if (*rcs->regparse != ']')
				FAIL("unmatched []");
			rcs->regparse++;
			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(rcs, 1, &flags);
		if (ret == NULL)
			return(NULL);
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case ')':
		FAIL("internal urp");	/* Supposed to be caught earlier. */
		break;
	case '?':
	case '+':
	case '*':
		FAIL("?+* follows nothing");
		break;
	case '\\':
		if (*rcs->regparse == '\0')
			FAIL("trailing \\");
		ret = regnode(rcs, EXACTLY);
		regc(rcs, *rcs->regparse++);
		regc(rcs, '\0');
		*flagp |= HASWIDTH|SIMPLE;
		break;
	default: {
			int len;
			char ender;

			rcs->regparse--;
			len = strcspn_(rcs->regparse, META);
			if (len <= 0)
				FAIL("internal disaster");
			ender = *(rcs->regparse+len);
			if (len > 1 && ISMULT(ender))
				len--;		/* Back off clear of ?+* operand. */
			*flagp |= HASWIDTH;
			if (len == 1)
				*flagp |= SIMPLE;
			ret = regnode(rcs, EXACTLY);
			while (len > 0) {
				regc(rcs, *rcs->regparse++);
				len--;
			}
			regc(rcs, '\0');
		}
		break;
	}

	return(ret);
}

/*
 - regnode - emit a node
 */
static char *			/* Location. */
regnode(regcompState *rcs, char op)
{
	char *ret;
	char *ptr;

	ret = rcs->regcode;
	if (ret == &regdummy) {
		rcs->regsize += 3;
		return(ret);
	}

	ptr = ret;
	*ptr++ = op;
	*ptr++ = '\0';		/* Null "next" pointer. */
	*ptr++ = '\0';
	rcs->regcode = ptr;

	return(ret);
}

/*
 - regc - emit (if appropriate) a byte of code
 */
static void
regc(regcompState *rcs, char b)
{
	if (rcs->regcode != &regdummy)
		*rcs->regcode++ = b;
	else
		rcs->regsize++;
}

/*
 - reginsert - insert an operator in front of already-emitted operand
 *
 * Means relocating the operand.
 */
static void
reginsert(regcompState *rcs, char op, char *opnd)
{
	char *src;
	char *dst;
	char *place;

	if (rcs->regcode == &regdummy) {
		rcs->regsize += 3;
		return;
	}

	src = rcs->regcode;
	rcs->regcode += 3;
	dst = rcs->regcode;
	while (src > opnd)
		*--dst = *--src;

	place = opnd;		/* Op node, where operand used to be. */
	*place++ = op;
	*place++ = '\0';
	*place++ = '\0';
}

/*
 - regtail - set the next-pointer at the end of a node chain
 */
static void
regtail(char *p, char const *val)
{
	char *scan;
	char *temp;
	int offset;

	if (p == &regdummy)
		return;

	/* Find last node. */
	scan = p;
	for (;;) {
		temp = regnext(scan);
		if (temp == NULL)
			break;
		scan = temp;
	}

	if (OP(scan) == BACK)
		offset = scan - val;
	else
		offset = val - scan;
	*(scan+1) = (offset>>8)&0377;
	*(scan+2) = offset&0377;
}

/*
 - regoptail - regtail on operand of first argument; nop if operandless
 */
static void
regoptail(char *p, char const *val)
{
	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
	if (p == NULL || p == &regdummy || OP(p) != BRANCH)
		return;
	regtail(OPERAND(p), val);
}

/*
 * regexec and friends
 */

/*
 * Global work variables for regexec().
 */
typedef struct regexecState {
	char const *reginput;		/* String-input pointer. */
	char const *regbol;			/* Beginning of input, for ^ check. */
	char const **regstartp;		/* Pointer to startp array. */
	char const **regendp;		/* Ditto for endp. */
} regexecState;

#undef FAIL
#define	FAIL(m)	{ pgpDebugMsg(m); return(kPGPError_BadParams); }


/*
 * Forwards.
 */
static int regtry(regexecState *res, regexp *prog, char const *string);
static int regmatch(regexecState *res, char const *prog);
static int regrepeat(regexecState *res, char const *p);


#if REGEXP_DEBUG
int regnarrate = 0;
void regdump();
static char *regprop();
#endif

/*
 - regexec - match a regexp against a string
 */
int
pgpRegExec(regexp *prog, char const *string)
{
	char const *s;
	regexecState s_res;
	regexecState *res = &s_res;

	/* Be paranoid... */
	if (prog == NULL || string == NULL) {
		FAIL("NULL parameter");
	}

	/* Check validity of program. */
	if (UCHARAT(prog->program) != MAGIC) {
		FAIL("corrupted program");
		return(0);
	}

	pgpClearMemory( &s_res, sizeof(s_res) );

	/* If there is a "must appear" string, look for it. */
	if (prog->regmust != NULL) {
		s = string;
		while ((s = strchr(s, prog->regmust[0])) != NULL) {
			if (strncmp(s, prog->regmust, prog->regmlen) == 0)
				break;	/* Found it. */
			s++;
		}
		if (s == NULL)	/* Not present. */
			return(0);
	}

	/* Mark beginning of line for ^ . */
	res->regbol = string;

	/* Simplest case:  anchored match need be tried only once. */
	if (prog->reganch)
		return(regtry(res, prog, string));

	/* Messy cases:  unanchored match. */
	s = string;
	if (prog->regstart != '\0')
		/* We know what char it must start with. */
		while ((s = strchr(s, prog->regstart)) != NULL) {
			if (regtry(res, prog, s))
				return(1);
			s++;
		}
	else
		/* We don't -- general case. */
		do {
			if (regtry(res, prog, s))
				return(1);
		} while (*s++ != '\0');

	/* Failure. */
	return(0);
}

/*
 - regtry - try match at specific point
 */
static int			/* 0 failure, 1 success */
regtry(regexecState *res, regexp *prog, char const *string)
{
	int i;
	char const **sp;
	char const **ep;

	res->reginput = string;
	res->regstartp = prog->startp;
	res->regendp = prog->endp;

	sp = prog->startp;
	ep = prog->endp;
	for (i = NSUBEXP; i > 0; i--) {
		*sp++ = NULL;
		*ep++ = NULL;
	}
	if (regmatch(res, prog->program + 1)) {
		prog->startp[0] = string;
		prog->endp[0] = res->reginput;
		return(1);
	} else
		return(0);
}

/*
 - regmatch - main matching routine
 *
 * Conceptually the strategy is simple:  check to see whether the current
 * node matches, call self recursively to see whether the rest matches,
 * and then act accordingly.  In practice we make some effort to avoid
 * recursion, in particular by going through "ordinary" nodes (that don't
 * need to know whether the rest of the match failed) by a loop instead of
 * by recursion.
 */
static int			/* 0 failure, 1 success */
regmatch(regexecState *res, char const *prog)
{
	char const *scan;	/* Current node. */
	char const *next;		/* Next node. */

	scan = prog;
#if REGEXP_DEBUG
	if (scan != NULL && regnarrate)
		fprintf(stderr, "%s(\n", regprop(scan));
#endif
	while (scan != NULL) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -