📄 regexp.cpp

📁 Crimson编辑器的英文版,完成从韩文版变成英文版的移植,并且附带可执行文件和注册表文件,无需原先的安装包,是改写编辑器的最理想选择.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
void CRegExp::reginsert(TCHAR op, TCHAR *opnd)
{
	TCHAR *place;

	if (!bEmitCode) {
		regsize += 3;
		return;
	}

	(void) memmove(opnd+3, opnd, (size_t)((regcode - opnd)*sizeof(TCHAR)));
	regcode += 3;

	place = opnd;		// Op node, where operand used to be.
	*place++ = op;
	*place++ = _T('\0');
	*place++ = _T('\0');
}

//
// regtail - set the next-pointer at the end of a node chain

void CRegExp::regtail(TCHAR *p, TCHAR *val)
{
	TCHAR *scan;
	TCHAR *temp;
//	int offset;

	if (!bEmitCode)
		return;

	// Find last node.
	for (scan = p; (temp = regnext(scan)) != NULL; scan = temp)
		continue;

	*((short *)(scan+1)) = (OP(scan) == BACK) ? scan - val : val - scan;
}


// regoptail - regtail on operand of first argument; nop if operandless

void CRegExp::regoptail(TCHAR *p, TCHAR *val)
{
	// "Operandless" and "op != BRANCH" are synonymous in practice.
	if (!bEmitCode || OP(p) != BRANCH)
		return;
	regtail(OPERAND(p), val);
}


// RegFind	- match a regexp against a string
// Returns	- Returns position of regexp or -1
//			  if regular expression not found
// Note		- The regular expression should have been
//			  previously compiled using RegComp
int CRegExp::RegFind(const TCHAR *str)
{
	TCHAR *string = (TCHAR *)str;	// avert const poisoning
	TCHAR *s;

	// Delete any previously stored found string
	delete sFoundText;
	sFoundText = NULL;

	// Be paranoid.
	if(string == NULL)
	{
		TRACE0("NULL argument to regexec\n");
		return(-1);
	}

	// Check validity of regex
	if (!bCompiled)
	{
		TRACE0("No regular expression provided yet.\n");
		return(-1);
	}

	// If there is a "must appear" string, look for it.
	if (regmust != NULL && _tcsstr(string, regmust) == NULL)
		return(-1);

	// Mark beginning of line for ^
	regbol = string;

	// Simplest case:  anchored match need be tried only once.
	if (reganch)
	{
		if( regtry(string) )
		{
			// Save the found substring in case we need it
			sFoundText = new TCHAR[GetFoundLength()+1];
			sFoundText[GetFoundLength()] = _T('\0');
			_tcsncpy(sFoundText, string, GetFoundLength() );

			return 0;
		}
		//String not found
		return -1;
	}

	// Messy cases:  unanchored match.
	if (regstart != _T('\0'))
	{
		// We know what TCHAR it must start with.
		for (s = string; s != NULL; s = _tcschr(s+1, regstart))
			if (regtry(s))
			{
				int nPos = s-str;

				// Save the found substring in case we need it later
				sFoundText = new TCHAR[GetFoundLength()+1];
				sFoundText[GetFoundLength()] = _T('\0');
				_tcsncpy(sFoundText, s, GetFoundLength() );

				return nPos;
			}
		return -1;
	}
	else
	{
		// We don't -- general case
		for (s = string; !regtry(s); s++)
			if (*s == _T('\0'))
				return(-1);

		int nPos = s-str;

		// Save the found substring in case we need it later
		sFoundText = new TCHAR[GetFoundLength()+1];
		sFoundText[GetFoundLength()] = _T('\0');
		_tcsncpy(sFoundText, s, GetFoundLength() );

		return nPos;
	}
	// NOTREACHED
}


// regtry - try match at specific point

int	CRegExp::regtry(TCHAR *string)
{
	int i;
	TCHAR **stp;
	TCHAR **enp;

	reginput = string;

	stp = startp;
	enp = endp;
	for (i = NSUBEXP; i > 0; i--)
	{
		*stp++ = NULL;
		*enp++ = NULL;
	}
	if (regmatch(program))
	{
		startp[0] = string;
		endp[0] = reginput;
		return(1);
	}
	else
		return(0);
}

// regmatch - main matching routine
//
// Conceptually the strategy is simple:  check to see whether the current
// node matches, call self recursively to see whether the rest matches,
// and then act accordingly.  In practice we make some effort to avoid
// recursion, in particular by going through "ordinary" nodes (that don't
// need to know whether the rest of the match failed) by a loop instead of
// by recursion.

int	CRegExp::regmatch(TCHAR *prog)
{
	TCHAR *scan;	// Current node.
	TCHAR *next;		// Next node.

	for (scan = prog; scan != NULL; scan = next) {
		next = regnext(scan);

		switch (OP(scan)) {
		case BOL:
			if (reginput != regbol)
				return(0);
			break;
		case EOL:
			if (*reginput != _T('\0'))
				return(0);
			break;
		case ANY:
			if (*reginput == _T('\0'))
				return(0);
			reginput++;
			break;
		case EXACTLY: {
			size_t len;
			TCHAR *const opnd = OPERAND(scan);

			// Inline the first character, for speed.
			if (*opnd != *reginput)
				return(0);
			len = _tcslen(opnd);
			if (len > 1 && _tcsncmp(opnd, reginput, len) != 0)
				return(0);
			reginput += len;
			break;
			}
		case ANYOF:
			if (*reginput == _T('\0') ||
					_tcschr(OPERAND(scan), *reginput) == NULL)
				return(0);
			reginput++;
			break;
		case ANYBUT:
			if (*reginput == _T('\0') ||
					_tcschr(OPERAND(scan), *reginput) != NULL)
				return(0);
			reginput++;
			break;
		case NOTHING:
			break;
		case BACK:
			break;
		case OPEN+1: case OPEN+2: case OPEN+3:
		case OPEN+4: case OPEN+5: case OPEN+6:
		case OPEN+7: case OPEN+8: case OPEN+9: {
			const int no = OP(scan) - OPEN;
			TCHAR *const input = reginput;

			if (regmatch(next)) {
				// Don't set startp if some later
				// invocation of the same parentheses
				// already has.

				if (startp[no] == NULL)
					startp[no] = input;
				return(1);
			} else
				return(0);
			break;
			}
		case CLOSE+1: case CLOSE+2: case CLOSE+3:
		case CLOSE+4: case CLOSE+5: case CLOSE+6:
		case CLOSE+7: case CLOSE+8: case CLOSE+9: {
			const int no = OP(scan) - CLOSE;
			TCHAR *const input = reginput;

			if (regmatch(next)) {
				// Don't set endp if some later
				// invocation of the same parentheses
				// already has.

				if (endp[no] == NULL)
					endp[no] = input;
				return(1);
			} else
				return(0);
			break;
			}
		case BRANCH: {
			TCHAR *const save = reginput;

			if (OP(next) != BRANCH)		// No choice.
				next = OPERAND(scan);	// Avoid recursion.
			else {
				while (OP(scan) == BRANCH) {
					if (regmatch(OPERAND(scan)))
						return(1);
					reginput = save;
					scan = regnext(scan);
				}
				return(0);
				// NOTREACHED
			}
			break;
			}
		case STAR:
		case PLUS: {
			const TCHAR nextch =
				(OP(next) == EXACTLY) ? *OPERAND(next) : _T('\0');
			size_t no;
			TCHAR *const save = reginput;
			const size_t min = (OP(scan) == STAR) ? 0 : 1;

			for (no = regrepeat(OPERAND(scan)) + 1; no > min; no--) {
				reginput = save + no - 1;
				// If it could work, try it.
				if (nextch == _T('\0') || *reginput == nextch)
					if (regmatch(next))
						return(1);
			}
			return(0);
			break;
			}
		case END:
			return(1);	// Success!
			break;
		default:
			TRACE0("regexp corruption\n");
			return(0);
			break;
		}
	}

	// We get here only if there's trouble -- normally "case END" is
	// the terminating point.

	TRACE0("corrupted pointers\n");
	return(0);
}


// regrepeat - report how many times something simple would match

size_t CRegExp::regrepeat(TCHAR *node)
{
	size_t count;
	TCHAR *scan;
	TCHAR ch;

	switch (OP(node))
	{
	case ANY:
		return(_tcslen(reginput));
		break;
	case EXACTLY:
		ch = *OPERAND(node);
		count = 0;
		for (scan = reginput; *scan == ch; scan++)
			count++;
		return(count);
		break;
	case ANYOF:
		return(_tcsspn(reginput, OPERAND(node)));
		break;
	case ANYBUT:
		return(_tcscspn(reginput, OPERAND(node)));
		break;
	default:		// Oh dear.  Called inappropriately.
		TRACE0("internal error: bad call of regrepeat\n");
		return(0);	// Best compromise.
		break;
	}
	// NOTREACHED
}

// regnext - dig the "next" pointer out of a node

TCHAR *CRegExp::regnext(TCHAR *p)
{
	const short &offset = *((short*)(p+1));

	if (offset == 0)
		return(NULL);

	return((OP(p) == BACK) ? p-offset : p+offset);
}

// GetReplaceString	- Converts a replace expression to a string
// Returns			- Pointer to newly allocated string
//					  Caller is responsible for deleting it
TCHAR* CRegExp::GetReplaceString( const TCHAR* sReplaceExp )
{
	TCHAR *src = (TCHAR *)sReplaceExp;
	TCHAR *buf;
	TCHAR c;
	int no;
	size_t len;

	if( sReplaceExp == NULL || sFoundText == NULL )
		return NULL;


	// First compute the length of the string
	int replacelen = 0;
	while ((c = *src++) != _T('\0'))
	{
		if (c == _T('&'))
			no = 0;
		else if (c == _T('\\') && isdigit(*src))
			no = *src++ - _T('0');
		else
			no = -1;

		if (no < 0)
		{
			// Ordinary character.
			if (c == _T('\\') && (*src == _T('\\') || *src == _T('&')))
				c = *src++;
			replacelen++;
		}
		else if (startp[no] != NULL && endp[no] != NULL &&
					endp[no] > startp[no])
		{
			// Get tagged expression
			len = endp[no] - startp[no];
			replacelen += len;
		}
	}

	// Now allocate buf
	buf = new TCHAR[replacelen+1];
	if( buf == NULL )
		return NULL;

	TCHAR* sReplaceStr = buf;

	// Add null termination
	buf[replacelen] = _T('\0');

	// Now we can create the string
	src = (TCHAR *)sReplaceExp;
	while ((c = *src++) != _T('\0'))
	{
		if (c == _T('&'))
			no = 0;
		else if (c == _T('\\') && isdigit(*src))
			no = *src++ - _T('0');
		else
			no = -1;

		if (no < 0)
		{
			// Ordinary character.
			if (c == _T('\\') && (*src == _T('\\') || *src == _T('&')))
				c = *src++;
			*buf++ = c;
		}
		else if (startp[no] != NULL && endp[no] != NULL &&
					endp[no] > startp[no])
		{
			// Get tagged expression
			len = endp[no] - startp[no];
			int tagpos = startp[no] - startp[0];

			_tcsncpy(buf, sFoundText + tagpos, len);
			buf += len;
		}
	}

	return sReplaceStr;
}

int CRegExp::GetFoundLength()
{
	if( startp[0] == NULL || endp[0] == NULL ) return 0;
	return endp[0] - startp[0];
}

BOOL CRegExp::GetFoundString(CString & szFoundString)
{
	if( startp[0] == NULL || endp[0] == NULL ) return FALSE;
	szFoundString = CString(startp[0], endp[0] - startp[0]);
	return TRUE;
}

BOOL CRegExp::GetReplaceString(LPCTSTR lpszReplaceExp, CString & szReplaceString)
{
	TCHAR * pReplaceString = GetReplaceString(lpszReplaceExp);
	if( ! pReplaceString ) return FALSE;
	szReplaceString = pReplaceString;
	delete pReplaceString;
	return TRUE;
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -