⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 liblexic.cpp

📁 lex语法分析
💻 CPP
📖 第 1 页 / 共 3 页
字号:
//	}
//	else {
//		return (unsigned int) CTagConst::tagUNKNOWN;
//	}
//}
unsigned int CLibLexic::ReplaceTagStrByInt(string strTag)
{
	return CTagConst::GetTagNo(strTag);
}
bool CLibLexic::IsMalform(string strWord)
{
	int i;
	bool bMalform;
	// 如果单词中出现 除 - . ' 空格  字母 以外的词 全是 畸形
	for(i=0, bMalform=false; 
	i<strWord.length() && !bMalform; i++) {
		if(! (   (strWord[i]=='-') || (strWord[i]=='.')
			   || (strWord[i]=='\'')
			   || (strWord[i]=='\040')
			   ||( strWord[i]>='a' && strWord[i]<='z')
			   ||( strWord[i]>='A' && strWord[i]<='Z') ) )
		{
			bMalform = true;
			break;
		}
	}
	return bMalform;
}


bool CLibLexic::IsVerbose(string strWord)
{
	int i;
	bool bVerbose = false;
	char c1,c2;
	int count;
	// 也就是不区分大小写
	ToLower(strWord);
	//是不是有连续超过三个(包括三个)的相同字母,若有则为冗余
	for(i=1,c1=strWord[0],count=1; i<strWord.length(); i++)
	{
		if(c1!=strWord[i]) {
			count=1;
			c1=strWord[i];
		}
		else {
			count++;
		}
		if(count>2) {
			bVerbose = true;
			break;
		}
	}
	if(bVerbose) return true;
	// 是不是有连续超过三个(包括三个)相同两连字如...hehehe...等,若有则为冗余
	int letter_state;
	for(i=0,letter_state=0; 
	i<strWord.length(); i++)
	{
		switch(letter_state) {
		case 0:
			c1 = strWord[i];			
			letter_state = 1;
			break;
		case 1:
			c2=strWord[i];
			count=1;
			letter_state = 2;
			break;
		case 2:
			if(strWord[i] == c1) {				
				letter_state = 3;
			}
			else {
				c1 = c2;
				c2 = strWord[i];
				letter_state = 2;
				count = 1;
			}
			break;
		case 3:
			if(strWord[i] == c2) {
				count++;
				letter_state = 2;
			}
			else {
				c1 = c2;
				c2 = strWord[i];
				letter_state = 2;
				count = 1;
			}
			break;
		}  // switch
		if(count>2) {
			bVerbose = true;
			break;
		}
	}
	if(bVerbose) return true;

	// 如果单词中的'点'超过三个就是'冗余'
	for(i=0, count=0; 
	i<strWord.length(); i++) {
		if( strWord[i]=='.') count++;
		if(count>3)  
		{
			bVerbose = true;
			break;
		}
	}
	return bVerbose;
}

// 匹配一些指定的模式
bool CLibLexic::FitPattern(string strWord,string& strPattern)
{
	bool bFit;

	bFit = FitPatternEx(strWord,strPattern);

	if(bFit) {
		ToLower(strPattern);
//		m_vecPattern.push_back(strPattern);
	}
	return bFit;
}
bool CLibLexic::FitPatternEx(string strWord,string& strPattern)
{
	string strTemp;
	bool bFitted;
	int i,len;

	len = strWord.length();
	for(i=0, bFitted=true; i<len && bFitted; i++)
	{
		if(strWord[i]=='-') {
			bFitted = FitPatternExEx(strTemp,strPattern);
			strTemp.erase(strTemp.begin(),strTemp.end());
			strPattern = strPattern + "-";
		}
		else {			
			strTemp.append(1,strWord[i]);
		}
	}
	if(bFitted) {
		bFitted = FitPatternExEx(strTemp,strPattern);
	}
	if(!bFitted) {
		strPattern.erase(strPattern.begin(),strPattern.end());
	}
	return bFitted;
}
bool CLibLexic::FitPatternExEx(string strTemp,string& strPattern)
{
	string strTempPat;
	bool bFit;
	bFit = true;
	if(PatternNumeral(strTemp,strTempPat)) 
	{
		strPattern = strPattern + strTempPat;
	}
	else if(PatternAlphas(strTemp))
	{
		strPattern = strPattern + strTemp;
	}
	else if(PatternAD(strTemp,strTempPat)) 
	{
		strPattern = strPattern + strTempPat;
	}
//	else if(PatternDD(strTemp,strTempPat))
//	{
//		strPattern = strPattern + strTempPat; 
//	}
	else if(PatternDA(strTemp,strTempPat)) 
	{
		strPattern = strPattern + strTempPat;
	}
	else if (PatternIS(strTemp,strTempPat))
	{
		strPattern = strPattern + strTempPat; 
	}
	else if (PatternLSR(strTemp,strTempPat))
	{
		strPattern = strPattern + strTempPat; 
	}
	else {
		bFit = false;
	}
	return bFit;
}
// 符合数的书写规范的如 100,323.0234 32.34  23,542,235
bool CLibLexic::NumeralInteger(string strWord)
{
	bool bInteger;
	int len,i;
	len = strWord.length();
	for(i=0, bInteger=true; i< len && bInteger; i++)
	{
		if (!(strWord[i]>='0' && strWord[i]<='9'))
		{
			bInteger = false;
		}
	}
	return bInteger;

}
bool CLibLexic::NumeralFloat(string strWord)
{
	bool bNormalQuit,bAtEndState;
	int i,state,len;
	len = strWord.length();

	bAtEndState = false;
	bNormalQuit = true;
	for(i=0, state=1; i<len && bNormalQuit; i++)
	{
		switch(state) {
		case 1:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				bAtEndState = true;
				state = 1;
			}
			else if (strWord[i] =='.' )
			{
				bAtEndState = false;
				state = 3;
			}
			else {
				bNormalQuit = false;
			}	
			break;

		case 3:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				bAtEndState = true;
				state = 4;
			}
			else {
				bNormalQuit = false;
			}	
			break;
		case 4:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				bAtEndState = true;
				state = 4;
			}
			else {
				bNormalQuit = false;
			}	
			break;
		default:
			assert(false);
		} // switch
	}

	return bNormalQuit && bAtEndState;
}
bool CLibLexic::NumeralDigits(string strWord)
{
	bool bNormalQuit,bAtEndState;
	int i,state;
	int len;

	len = strWord.length();
	bNormalQuit =true;
	bAtEndState = false;
	for(i=0, state=0; i<len && bNormalQuit; i++)
	{
		switch(state) {
		case 0:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				bAtEndState = true;
				state = 1;
			}
			else {
				bNormalQuit = false;
			}	
			break;
		case 1:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				bAtEndState = true;
				state = 1;
			}
			else if (strWord[i] == ',')
			{
				bAtEndState = false;
				state = 2;
			}
			else if (strWord[i] =='.' )
			{
				bAtEndState = false;
				state = 3;
			}
			else {
				bNormalQuit = false;
			}	
			break;
		case 2:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				bAtEndState = true;
				state = 1;
			}
			else {
				bNormalQuit = false;
			}	
			break;
		case 3:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				bAtEndState = true;
				state = 4;
			}
			else {
				bNormalQuit = false;
			}	
			break;
		case 4:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				bAtEndState = true;
				state = 4;
			}
			else {
				bNormalQuit = false;
			}	
			break;
		default:
			assert(false);
		} // switch
	}
	return bNormalQuit && bAtEndState;
}
// 整数(不可有逗号)                                   记为{i}
// 整数(不可有逗号) 实数(不可有逗号) 纯小数(如 .32)   记为{f}
// 整数(可有逗号)   实数(可有逗号)                    记为{d}
bool CLibLexic::PatternNumeral(string strNum,string& strNumPat)
{
	// 注意这里的顺序一定不能错
	if(NumeralInteger(strNum))
	{
		strNumPat = "{i}";
	}
	else if(NumeralFloat(strNum))
	{
		strNumPat = "{f}";
	}
	else if(NumeralDigits(strNum))
	{
		strNumPat = "{d}";
	}
	else {
		return false;
	}
	return true;
}
//  全字母.  
//  全字母
bool CLibLexic::PatternAlphas(string str)
{ 
	bool bAlphas;
	int i,len;
	len = str.length();
	if('.'==str[len-1]) 
	{
		len = len - 1;
	}
	for(bAlphas=true,i=0; i<len && bAlphas;	i++)
	{
		if( ! (   ( str[i]>='a' && str[i]<='z')
		        ||( str[i]>='A' && str[i]<='Z') ) )
		{
			bAlphas = false;
		}
			  
	}
	return bAlphas;
}
//  单字符   + .  + {i}
//  单字符   + {i} + . + {i}       V.4-compliant  X11.5-based
//  单字符   + {i}
//  全字母 + {i} +  ....省略 
//  说明:若只加  全字母  就结束了 若加{i} + 全字母 还可以递归
bool CLibLexic::PatternAD(string strWord,string& strPattern)
{
	string strTempPat;
	int i,state,len;
	bool bAtEndState,bNormalQuit;

	len = strWord.length();
	for(i=0, state=0, bAtEndState=false, bNormalQuit=true;
	i<len && bNormalQuit;
	i++) 
	{
		switch(state) {
		case 0:
			if ( ( strWord[i]>='a' && strWord[i]<='z') || ( strWord[i]>='A' && strWord[i]<='Z')) 
			{
				strTempPat.append(1,strWord[i]);
				state = 1;
				bAtEndState = false;
			}
			else {
				bNormalQuit = false;
			}
			break;
		case 1:
			if ( ( strWord[i]>='a' && strWord[i]<='z') || ( strWord[i]>='A' && strWord[i]<='Z')) 
			{
				strTempPat.append(1,strWord[i]);
				state = 2;
				bAtEndState = true;
			}
			else if (strWord[i]>='0' && strWord[i]<='9')
			{
				state = 4;
				bAtEndState = true;	
			}
			else if ('.'==strWord[i]) {
				strTempPat.append(1,strWord[i]);
				state = 3;
				bAtEndState = false;
			}
			else {
				bNormalQuit = false;
			}
			break;
		case 2:
			if ( ( strWord[i]>='a' && strWord[i]<='z') || ( strWord[i]>='A' && strWord[i]<='Z')) 
			{
				strTempPat.append(1,strWord[i]);
				state = 2;
				bAtEndState = true;
			}
			else if (strWord[i]>='0' && strWord[i]<='9')
			{
				state = 4;
				bAtEndState = true;	
			}
			else {
				bNormalQuit = false;
			}
			break;
		case 3:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				state = 5;
				bAtEndState = true;
			}
			else {
				bNormalQuit = false;
			}
			break;
		case 4:
			if ( ( strWord[i]>='a' && strWord[i]<='z') || ( strWord[i]>='A' && strWord[i]<='Z')) 
			{
				strTempPat = strTempPat + "{i}";
				strTempPat.append(1,strWord[i]);
				state = 2;
				bAtEndState = true;
			}
			else if (strWord[i]>='0' && strWord[i]<='9')
			{
				state = 4;
				bAtEndState = true;	
			}
			else if ('.'==strWord[i]) {
				strTempPat = strTempPat + "{i}";
				strTempPat.append(1,strWord[i]);
				state = 3;
				bAtEndState = false;
			}
			else {
				bNormalQuit = false;
			}
			break;
		case 5:
			if (strWord[i]>='0' && strWord[i]<='9')
			{
				state = 5;
				bAtEndState = true;	
			}
			else {
				bNormalQuit = false;
			}
			break;
		default:
			assert(false);
		} // switch;
	}// for
	if(bNormalQuit&&bAtEndState) {
		switch(state) {
		case 2:
			strPattern = strTempPat;
			break;
		case 4:
		case 5:
			strPattern = strTempPat+"{i}";
			break;
		default:
			assert(false);
		}
		return true;
	}
	else {
		return false;
	}
}
//  {d}% 
//  {d}  +  :{d}  +   .......(省略)
bool CLibLexic::PatternDD(string strWord,string& strPattern)
{
	string strTemp,strTempPat,strTempPatEx;
	bool bFitted;
	int i,len;

	len = strWord.length();
	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -