regcomp.c

来自「用于嵌入式Linux系统的标准C的库函数」· C语言代码 · 共 2,090 行 · 第 1/4 页
2,090 行
	/* deal with undersized strip */	if (p->slen >= p->ssize)		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */	assert(p->slen < p->ssize);	/* finally, it's all reduced to the easy case */	p->strip[p->slen++] = SOP(op, opnd);}/* - doinsert - insert a sop into the strip == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); */static voiddoinsert(p, op, opnd, pos)struct parse *p;sop op;size_t opnd;sopno pos;{	sopno sn;	sop s;	int i;	/* avoid making error situations worse */	if (p->error != 0)		return;	sn = HERE();	EMIT(op, opnd);		/* do checks, ensure space */	assert(HERE() == sn+1);	s = p->strip[sn];	/* adjust paren pointers */	assert(pos > 0);	for (i = 1; i < NPAREN; i++) {		if (p->pbegin[i] >= pos) {			p->pbegin[i]++;		}		if (p->pend[i] >= pos) {			p->pend[i]++;		}	}	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],						(HERE()-pos-1)*sizeof(sop));	p->strip[pos] = s;}/* - dofwd - complete a forward reference == static void dofwd(struct parse *p, sopno pos, sop value); */static voiddofwd(p, pos, value)struct parse *p;sopno pos;sop value;{	/* avoid making error situations worse */	if (p->error != 0)		return;	assert(value < 1<<OPSHIFT);	p->strip[pos] = OP(p->strip[pos]) | value;}/* - enlarge - enlarge the strip == static void enlarge(struct parse *p, sopno size); */static voidenlarge(p, size)struct parse *p;sopno size;{	sop *sp;	if (p->ssize >= size)		return;	sp = (sop *)realloc(p->strip, size*sizeof(sop));	if (sp == NULL) {		SETERROR(REG_ESPACE);		return;	}	p->strip = sp;	p->ssize = size;}/* - stripsnug - compact the strip == static void stripsnug(struct parse *p, struct re_guts *g); */static voidstripsnug(p, g)struct parse *p;struct re_guts *g;{	g->nstates = p->slen;	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));	if (g->strip == NULL) {		SETERROR(REG_ESPACE);		g->strip = p->strip;	}}/* - findmust - fill in must and mlen with longest mandatory literal string == static void findmust(struct parse *p, struct re_guts *g); * * This algorithm could do fancy things like analyzing the operands of | * for common subsequences.  Someday.  This code is simple and finds most * of the interesting cases. * * Note that must and mlen got initialized during setup. */static voidfindmust(p, g)struct parse *p;struct re_guts *g;{	sop *scan;	sop *start;	sop *newstart;	sopno newlen;	sop s;	char *cp;	sopno i;	int offset;	int cs, mccs;	/* avoid making error situations worse */	if (p->error != 0)		return;	/* Find out if we can handle OANYOF or not */	mccs = 0;	for (cs = 0; cs < g->ncsets; cs++)		if (g->sets[cs].multis != NULL)			mccs = 1;	/* find the longest OCHAR sequence in strip */	newlen = 0;	offset = 0;	g->moffset = 0;	scan = g->strip + 1;	do {		s = *scan++;		switch (OP(s)) {		case OCHAR:		/* sequence member */			if (newlen == 0)		/* new sequence */				newstart = scan - 1;			newlen++;			break;		case OPLUS_:		/* things that don't break one */		case OLPAREN:		case ORPAREN:			break;		case OQUEST_:		/* things that must be skipped */		case OCH_:			offset = altoffset(scan, offset, mccs);			scan--;			do {				scan += OPND(s);				s = *scan;				/* assert() interferes w debug printouts */				if (OP(s) != O_QUEST && OP(s) != O_CH &&							OP(s) != OOR2) {					g->iflags |= BAD;					return;				}			} while (OP(s) != O_QUEST && OP(s) != O_CH);			/* fallthrough */		case OBOW:		/* things that break a sequence */		case OEOW:		case OBOL:		case OEOL:		case O_QUEST:		case O_CH:		case OEND:			if (newlen > g->mlen) {		/* ends one */				start = newstart;				g->mlen = newlen;				if (offset > -1) {					g->moffset += offset;					offset = newlen;				} else					g->moffset = offset;			} else {				if (offset > -1)					offset += newlen;			}			newlen = 0;			break;		case OANY:			if (newlen > g->mlen) {		/* ends one */				start = newstart;				g->mlen = newlen;				if (offset > -1) {					g->moffset += offset;					offset = newlen;				} else					g->moffset = offset;			} else {				if (offset > -1)					offset += newlen;			}			if (offset > -1)				offset++;			newlen = 0;			break;		case OANYOF:		/* may or may not invalidate offset */			/* First, everything as OANY */			if (newlen > g->mlen) {		/* ends one */				start = newstart;				g->mlen = newlen;				if (offset > -1) {					g->moffset += offset;					offset = newlen;				} else					g->moffset = offset;			} else {				if (offset > -1)					offset += newlen;			}			if (offset > -1)				offset++;			newlen = 0;			/* And, now, if we found out we can't deal with			 * it, make offset = -1.			 */			if (mccs)				offset = -1;			break;		default:			/* Anything here makes it impossible or too hard			 * to calculate the offset -- so we give up;			 * save the last known good offset, in case the			 * must sequence doesn't occur later.			 */			if (newlen > g->mlen) {		/* ends one */				start = newstart;				g->mlen = newlen;				if (offset > -1)					g->moffset += offset;				else					g->moffset = offset;			}			offset = -1;			newlen = 0;			break;		}	} while (OP(s) != OEND);	if (g->mlen == 0) {		/* there isn't one */		g->moffset = -1;		return;	}	/* turn it into a character string */	g->must = malloc((size_t)g->mlen + 1);	if (g->must == NULL) {		/* argh; just forget it */		g->mlen = 0;		g->moffset = -1;		return;	}	cp = g->must;	scan = start;	for (i = g->mlen; i > 0; i--) {		while (OP(s = *scan++) != OCHAR)			continue;		assert(cp < g->must + g->mlen);		*cp++ = (char)OPND(s);	}	assert(cp == g->must + g->mlen);	*cp++ = '\0';		/* just on general principles */}/* - altoffset - choose biggest offset among multiple choices == static int altoffset(sop *scan, int offset, int mccs); * * Compute, recursively if necessary, the largest offset among multiple * re paths. */static intaltoffset(scan, offset, mccs)sop *scan;int offset;int mccs;{	int largest;	int try;	sop s;	/* If we gave up already on offsets, return */	if (offset == -1)		return -1;	largest = 0;	try = 0;	s = *scan++;	while (OP(s) != O_QUEST && OP(s) != O_CH) {		switch (OP(s)) {		case OOR1:			if (try > largest)				largest = try;			try = 0;			break;		case OQUEST_:		case OCH_:			try = altoffset(scan, try, mccs);			if (try == -1)				return -1;			scan--;			do {				scan += OPND(s);				s = *scan;				if (OP(s) != O_QUEST && OP(s) != O_CH &&							OP(s) != OOR2)					return -1;			} while (OP(s) != O_QUEST && OP(s) != O_CH);			/* We must skip to the next position, or we'll			 * leave altoffset() too early.			 */			scan++;			break;		case OANYOF:			if (mccs)				return -1;		case OCHAR:		case OANY:			try++;		case OBOW:		case OEOW:		case OLPAREN:		case ORPAREN:		case OOR2:			break;		default:			try = -1;			break;		}		if (try == -1)			return -1;		s = *scan++;	}	if (try > largest)		largest = try;	return largest+offset;}/* - computejumps - compute char jumps for BM scan == static void computejumps(struct parse *p, struct re_guts *g); * * This algorithm assumes g->must exists and is has size greater than * zero. It's based on the algorithm found on Computer Algorithms by * Sara Baase. * * A char jump is the number of characters one needs to jump based on * the value of the character from the text that was mismatched. */static voidcomputejumps(p, g)struct parse *p;struct re_guts *g;{	int ch;	int mindex;	/* Avoid making errors worse */	if (p->error != 0)		return;	g->charjump = (int*) malloc((NC + 1) * sizeof(int));	if (g->charjump == NULL)	/* Not a fatal error */		return;	/* Adjust for signed chars, if necessary */	g->charjump = &g->charjump[-(CHAR_MIN)];	/* If the character does not exist in the pattern, the jump	 * is equal to the number of characters in the pattern.	 */	for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)		g->charjump[ch] = g->mlen;	/* If the character does exist, compute the jump that would	 * take us to the last character in the pattern equal to it	 * (notice that we match right to left, so that last character	 * is the first one that would be matched).	 */	for (mindex = 0; mindex < g->mlen; mindex++)		g->charjump[g->must[mindex]] = g->mlen - mindex - 1;}/* - computematchjumps - compute match jumps for BM scan == static void computematchjumps(struct parse *p, struct re_guts *g); * * This algorithm assumes g->must exists and is has size greater than * zero. It's based on the algorithm found on Computer Algorithms by * Sara Baase. * * A match jump is the number of characters one needs to advance based * on the already-matched suffix. * Notice that all values here are minus (g->mlen-1), because of the way * the search algorithm works. */static voidcomputematchjumps(p, g)struct parse *p;struct re_guts *g;{	int mindex;		/* General "must" iterator */	int suffix;		/* Keeps track of matching suffix */	int ssuffix;		/* Keeps track of suffixes' suffix */	int* pmatches;		/* pmatches[k] points to the next i				 * such that i+1...mlen is a substring				 * of k+1...k+mlen-i-1				 */	/* Avoid making errors worse */	if (p->error != 0)		return;	pmatches = (int*) malloc(g->mlen * sizeof(unsigned int));	if (pmatches == NULL) {		g->matchjump = NULL;		return;	}	g->matchjump = (int*) malloc(g->mlen * sizeof(unsigned int));	if (g->matchjump == NULL)	/* Not a fatal error */		return;	/* Set maximum possible jump for each character in the pattern */	for (mindex = 0; mindex < g->mlen; mindex++)		g->matchjump[mindex] = 2*g->mlen - mindex - 1;	/* Compute pmatches[] */	for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;	    mindex--, suffix--) {		pmatches[mindex] = suffix;		/* If a mismatch is found, interrupting the substring,		 * compute the matchjump for that position. If no		 * mismatch is found, then a text substring mismatched		 * against the suffix will also mismatch against the		 * substring.		 */		while (suffix < g->mlen		    && g->must[mindex] != g->must[suffix]) {			g->matchjump[suffix] = MIN(g->matchjump[suffix],			    g->mlen - mindex - 1);			suffix = pmatches[suffix];		}	}	/* Compute the matchjump up to the last substring found to jump	 * to the beginning of the largest must pattern prefix matching	 * it's own suffix.	 */	for (mindex = 0; mindex <= suffix; mindex++)		g->matchjump[mindex] = MIN(g->matchjump[mindex],		    g->mlen + suffix - mindex);        ssuffix = pmatches[suffix];        while (suffix < g->mlen) {                while (suffix <= ssuffix && suffix < g->mlen) {                        g->matchjump[suffix] = MIN(g->matchjump[suffix],			    g->mlen + ssuffix - suffix);                        suffix++;                }		if (suffix < g->mlen)                	ssuffix = pmatches[ssuffix];        }	free(pmatches);}/* - pluscount - count + nesting == static sopno pluscount(struct parse *p, struct re_guts *g); */static sopno			/* nesting depth */pluscount(p, g)struct parse *p;struct re_guts *g;{	sop *scan;	sop s;	sopno plusnest = 0;	sopno maxnest = 0;	if (p->error != 0)		return(0);	/* there may not be an OEND */	scan = g->strip + 1;	do {		s = *scan++;		switch (OP(s)) {		case OPLUS_:			plusnest++;			break;		case O_PLUS:			if (plusnest > maxnest)				maxnest = plusnest;			plusnest--;			break;		}	} while (OP(s) != OEND);	if (plusnest != 0)		g->iflags |= BAD;	return(maxnest);}
regcomp.c - 源码说明

本页面展示了「用于嵌入式Linux系统的标准C的库函数」中的 regcomp.c 源码文件，采用 C语言编程语言编写，共 2,090 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Linux相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?