📄 regexp.cpp

📁 检测语法的Edit类
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
	(void) memmove(opnd+3, opnd, (size_t)((regcode - opnd)*sizeof(TCHAR)));
	regcode += 3;

	place = opnd;		// Op node, where operand used to be. 
	*place++ = op;
	*place++ = _T('\0');
	*place++ = _T('\0');
}

//
// regtail - set the next-pointer at the end of a node chain
 
void CRegExp::regtail(TCHAR *p, TCHAR *val)
{
	TCHAR *scan;
	TCHAR *temp;
//	int offset;

	if (!bEmitCode)
		return;

	// Find last node. 
	for (scan = p; (temp = regnext(scan)) != NULL; scan = temp)
		continue;

	*((short *)(scan+1)) = (OP(scan) == BACK) ? scan - val : val - scan;
}


// regoptail - regtail on operand of first argument; nop if operandless
 
void CRegExp::regoptail(TCHAR *p, TCHAR *val)
{
	// "Operandless" and "op != BRANCH" are synonymous in practice. 
	if (!bEmitCode || OP(p) != BRANCH)
		return;
	regtail(OPERAND(p), val);
}


// RegFind	- match a regexp against a string
// Returns	- Returns position of regexp or -1
//			  if regular expression not found
// Note		- The regular expression should have been
//			  previously compiled using RegComp
int CRegExp::RegFind(const TCHAR *str)
{
	TCHAR *string = (TCHAR *)str;	// avert const poisoning 
	TCHAR *s;

	// Delete any previously stored found string
	delete sFoundText;
	sFoundText = NULL;

	// Be paranoid. 
	if(string == NULL) 
	{
		TRACE0("NULL argument to regexec\n");
		return(-1);
	}

	// Check validity of regex
	if (!bCompiled) 
	{
		TRACE0("No regular expression provided yet.\n");
		return(-1);
	}

	// If there is a "must appear" string, look for it. 
	if (regmust != NULL && _tcsstr(string, regmust) == NULL)
		return(-1);

	// Mark beginning of line for ^
	regbol = string;

	// Simplest case:  anchored match need be tried only once. 
	if (reganch)
	{
		if( regtry(string) )
		{
			// Save the found substring in case we need it
			sFoundText = new TCHAR[GetFindLen()+1];
			sFoundText[GetFindLen()] = _T('\0');
			_tcsncpy(sFoundText, string, GetFindLen() );

			return 0;
		}
		//String not found
		return -1;
	}

	// Messy cases:  unanchored match. 
	if (regstart != _T('\0')) 
	{
		// We know what TCHAR it must start with. 
		for (s = string; s != NULL; s = _tcschr(s+1, regstart))
			if (regtry(s))
			{
				int nPos = s-str;

				// Save the found substring in case we need it later
				sFoundText = new TCHAR[GetFindLen()+1];
				sFoundText[GetFindLen()] = _T('\0');
				_tcsncpy(sFoundText, s, GetFindLen() );

				return nPos;
			}
		return -1;
	} 
	else 
	{
		// We don't -- general case
		for (s = string; !regtry(s); s++)
			if (*s == _T('\0'))
				return(-1);

		int nPos = s-str;

		// Save the found substring in case we need it later
		sFoundText = new TCHAR[GetFindLen()+1];
		sFoundText[GetFindLen()] = _T('\0');
		_tcsncpy(sFoundText, s, GetFindLen() );

		return nPos;
	}
	// NOTREACHED 
}


// regtry - try match at specific point
 
int	CRegExp::regtry(TCHAR *string)
{
	int i;
	TCHAR **stp;
	TCHAR **enp;

	reginput = string;

	stp = startp;
	enp = endp;
	for (i = NSUBEXP; i > 0; i--) 
	{
		*stp++ = NULL;
		*enp++ = NULL;
	}
	if (regmatch(program)) 
	{
		startp[0] = string;
		endp[0] = reginput;
		return(1);
	} 
	else
		return(0);
}

// regmatch - main matching routine
//
// Conceptually the strategy is simple:  check to see whether the current
// node matches, call self recursively to see whether the rest matches,
// and then act accordingly.  In practice we make some effort to avoid
// recursion, in particular by going through "ordinary" nodes (that don't
// need to know whether the rest of the match failed) by a loop instead of
// by recursion.
 
int	CRegExp::regmatch(TCHAR *prog)
{
	TCHAR *scan;	// Current node. 
	TCHAR *next;		// Next node. 

	for (scan = prog; scan != NULL; scan = next) {
		next = regnext(scan);

		switch (OP(scan)) {
		case BOL:
			if (reginput != regbol)
				return(0);
			break;
		case EOL:
			if (*reginput != _T('\0'))
				return(0);
			break;
		case ANY:
			if (*reginput == _T('\0'))
				return(0);
			reginput++;
			break;
		case EXACTLY: {
			size_t len;
			TCHAR *const opnd = OPERAND(scan);

			// Inline the first character, for speed. 
			if (*opnd != *reginput)
				return(0);
			len = _tcslen(opnd);
			if (len > 1 && _tcsncmp(opnd, reginput, len) != 0)
				return(0);
			reginput += len;
			break;
			}
		case ANYOF:
			if (*reginput == _T('\0') ||
					_tcschr(OPERAND(scan), *reginput) == NULL)
				return(0);
			reginput++;
			break;
		case ANYBUT:
			if (*reginput == _T('\0') ||
					_tcschr(OPERAND(scan), *reginput) != NULL)
				return(0);
			reginput++;
			break;
		case NOTHING:
			break;
		case BACK:
			break;
		case OPEN+1: case OPEN+2: case OPEN+3:
		case OPEN+4: case OPEN+5: case OPEN+6:
		case OPEN+7: case OPEN+8: case OPEN+9: {
			const int no = OP(scan) - OPEN;
			TCHAR *const input = reginput;

			if (regmatch(next)) {
				// Don't set startp if some later
				// invocation of the same parentheses
				// already has.
				 
				if (startp[no] == NULL)
					startp[no] = input;
				return(1);
			} else
				return(0);
			break;
			}
		case CLOSE+1: case CLOSE+2: case CLOSE+3:
		case CLOSE+4: case CLOSE+5: case CLOSE+6:
		case CLOSE+7: case CLOSE+8: case CLOSE+9: {
			const int no = OP(scan) - CLOSE;
			TCHAR *const input = reginput;

			if (regmatch(next)) {
				// Don't set endp if some later
				// invocation of the same parentheses
				// already has.
				 
				if (endp[no] == NULL)
					endp[no] = input;
				return(1);
			} else
				return(0);
			break;
			}
		case BRANCH: {
			TCHAR *const save = reginput;

			if (OP(next) != BRANCH)		// No choice. 
				next = OPERAND(scan);	// Avoid recursion. 
			else {
				while (OP(scan) == BRANCH) {
					if (regmatch(OPERAND(scan)))
						return(1);
					reginput = save;
					scan = regnext(scan);
				}
				return(0);
				// NOTREACHED 
			}
			break;
			}
		case STAR: 
		case PLUS: {
			const TCHAR nextch =
				(OP(next) == EXACTLY) ? *OPERAND(next) : _T('\0');
			size_t no;
			TCHAR *const save = reginput;
			const size_t min = (OP(scan) == STAR) ? 0 : 1;

			for (no = regrepeat(OPERAND(scan)) + 1; no > min; no--) {
				reginput = save + no - 1;
				// If it could work, try it. 
				if (nextch == _T('\0') || *reginput == nextch)
					if (regmatch(next))
						return(1);
			}
			return(0);
			break;
			}
		case END:
			return(1);	// Success! 
			break;
		default:
			TRACE0("regexp corruption\n");
			return(0);
			break;
		}
	}

	// We get here only if there's trouble -- normally "case END" is
	// the terminating point.
	 
	TRACE0("corrupted pointers\n");
	return(0);
}


// regrepeat - report how many times something simple would match
 
size_t CRegExp::regrepeat(TCHAR *node)
{
	size_t count;
	TCHAR *scan;
	TCHAR ch;

	switch (OP(node)) 
	{
	case ANY:
		return(_tcslen(reginput));
		break;
	case EXACTLY:
		ch = *OPERAND(node);
		count = 0;
		for (scan = reginput; *scan == ch; scan++)
			count++;
		return(count);
		break;
	case ANYOF:
		return(_tcsspn(reginput, OPERAND(node)));
		break;
	case ANYBUT:
		return(_tcscspn(reginput, OPERAND(node)));
		break;
	default:		// Oh dear.  Called inappropriately. 
		TRACE0("internal error: bad call of regrepeat\n");
		return(0);	// Best compromise. 
		break;
	}
	// NOTREACHED 
}

// regnext - dig the "next" pointer out of a node
 
TCHAR *CRegExp::regnext(TCHAR *p)
{
	const short &offset = *((short*)(p+1));

	if (offset == 0)
		return(NULL);

	return((OP(p) == BACK) ? p-offset : p+offset);
}

// GetReplaceString	- Converts a replace expression to a string
// Returns			- Pointer to newly allocated string
//					  Caller is responsible for deleting it
TCHAR* CRegExp::GetReplaceString( const TCHAR* sReplaceExp )
{
	TCHAR *src = (TCHAR *)sReplaceExp;
	TCHAR *buf;
	TCHAR c;
	int no;
	size_t len;

	if( sReplaceExp == NULL || sFoundText == NULL )
		return NULL;


	// First compute the length of the string
	int replacelen = 0;
	while ((c = *src++) != _T('\0')) 
	{
		if (c == _T('&'))
			no = 0;
		else if (c == _T('\\') && isdigit(*src))
			no = *src++ - _T('0');
		else
			no = -1;

		if (no < 0) 
		{	
			// Ordinary character. 
			if (c == _T('\\') && (*src == _T('\\') || *src == _T('&')))
				c = *src++;
			replacelen++;
		} 
		else if (startp[no] != NULL && endp[no] != NULL &&
					endp[no] > startp[no]) 
		{
			// Get tagged expression
			len = endp[no] - startp[no];
			replacelen += len;
		}
	}

	// Now allocate buf
	buf = new TCHAR[replacelen+1];
	if( buf == NULL )
		return NULL;

	TCHAR* sReplaceStr = buf;

	// Add null termination
	buf[replacelen] = _T('\0');
	
	// Now we can create the string
	src = (TCHAR *)sReplaceExp;
	while ((c = *src++) != _T('\0')) 
	{
		if (c == _T('&'))
			no = 0;
		else if (c == _T('\\') && isdigit(*src))
			no = *src++ - _T('0');
		else
			no = -1;

		if (no < 0) 
		{	
			// Ordinary character. 
			if (c == _T('\\') && (*src == _T('\\') || *src == _T('&')))
				c = *src++;
			*buf++ = c;
		} 
		else if (startp[no] != NULL && endp[no] != NULL &&
					endp[no] > startp[no]) 
		{
			// Get tagged expression
			len = endp[no] - startp[no];
			int tagpos = startp[no] - startp[0];

			_tcsncpy(buf, sFoundText + tagpos, len);
			buf += len;
		}
	}

	return sReplaceStr;
}

int RegSearchFind(CRegExp& r, CStringEx& string, LPCTSTR sSearchExp, long startPos, long& len)
{
	int nPos = 0;
	LPTSTR str = (LPTSTR)(LPCTSTR)string + startPos;

	r.RegComp(sSearchExp);

	if ((nPos = r.RegFind((LPTSTR)str)) != -1) {
		int offset = str-(LPCTSTR)string+nPos;
		len = r.GetFindLen();
		return nPos + startPos;
	}
	return -1;
}

int RegSearchReplaceAll( CStringEx& string, LPCTSTR sSearchExp, 
					 LPCTSTR sReplaceExp )
{
	int nPos = 0;
	int nReplaced = 0;
	CRegExp r;
	LPTSTR str = (LPTSTR)(LPCTSTR)string;

	r.RegComp( sSearchExp );
	while( (nPos = r.RegFind((LPTSTR)str)) != -1 )
	{
		nReplaced++;
		TCHAR *pReplaceStr = r.GetReplaceString( sReplaceExp );

		int offset = str-(LPCTSTR)string+nPos;
		string.Replace( offset, r.GetFindLen(), 
				pReplaceStr );

		// Replace might have caused a reallocation
		str = (LPTSTR)(LPCTSTR)string + offset + _tcslen(pReplaceStr);
		delete pReplaceStr;
	}
	return nReplaced;
}
上一页 12
💿 文件大小 172 K
👤 上传用户 zjwbt
📂 所属分类编辑器/阅读器
🏷️ 相关标签

#Edit #检测
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -