⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wordres.cpp

📁 计算机英汉机器翻译系统中的英语词性标注方法实现
💻 CPP
📖 第 1 页 / 共 4 页
字号:
				strcpy(Word,"nine");
				pDictNode2=g_objLexSearch.SearchWord(Word,nSearchMod);
				pDictNode1=CreateOrdinalWord(pDictNode2,inputword);
				return pDictNode1;
			}
			else if(strcmp(Word,"twelf")==0 )
			{
				strcpy(Word,"twelve");
				pDictNode2=g_objLexSearch.SearchWord(Word,nSearchMod);
				pDictNode1=CreateOrdinalWord(pDictNode2,inputword);
				return pDictNode1;
			}
	
/*
			for(int j=0; j<WordLen; j++)
			{
				if(!isdigit(Word[j]))
					break;
			}

			for(int j=0; j<WordLen-2; j++)
			{
				if(!isdigit(Word[j]))
					break;
			}
			if(j == WordLen-2)
			{
				strcpy(temp,"第");
				strcat(temp,Word);
				Mystrcpy(pDictNode1,temp,2);
				Mystrcpy(pDictNode1,"dig",3);
			}
*/
		}
		strcpy(Word,inputword);
		p=Word+ WordLen-4;
		strcpy(tail,p);
		if(strcmp(tail,"ieth")==0)
		{// >20序数词
			p[0]='y';
			p[1]='\0';
			if((pDictNode2=g_objLexSearch.SearchWord(Word,nSearchMod)) != NULL)
			{
				if(SearchCate(pDictNode2,Cate_number))
				{
					pDictNode1=CreateOrdinalWord(pDictNode2,inputword);
					return pDictNode1;
				}
			}
		}
		pDictNode1=CreateNewWord(inputword);		//生词
	}         
	return	pDictNode1;
}

BOOL	IsDigitalWord(char	*Word,int WordLen)
{	//是否为数字串
 char *p;
	for(int i=0; i<WordLen; i++)
		if(!isdigit(Word[i]) )
		{
			if ( (p=strstr(Word,"st"))!=NULL || (p=strstr(Word,"nd"))!=NULL ||
				(p=strstr(Word,"rd"))!=NULL || (p=strstr(Word,"th"))!=NULL )
			{
				if ( (WordLen-i)==2 )
				{
					return TRUE;
				}
				else
					return FALSE;
			}
			else
				return FALSE;
		}
	return	TRUE;
}

BOOL	IsAbreivWord(char *Word)
{
	if(strcmp("Mr",Word) == 0)
		return	TRUE;
	else if(strcmp("Ms",Word) == 0)
		return	TRUE;
	else if(strcmp("Mrs",Word) == 0)
		return	TRUE;
	else if(strcmp("Dr",Word) == 0)
		return	TRUE;
	else if(strcmp("Prof",Word) == 0)
		return	TRUE;
	else if(strcmp("sb",Word) == 0)
		return	TRUE;
	else if(strcmp("sth",Word) == 0)
		return	TRUE;
	else return	FALSE;
}


BOOL isAppellation(char	*Word)
{
	if(strcmp("Mr.",Word) == 0)
		return	TRUE;
	else if(strcmp("Ms.",Word) == 0)
		return	TRUE;
	else if(strcmp("Mrs.",Word) == 0)
		return	TRUE;
	else if(strcmp("Dr.",Word) == 0)
		return	TRUE;
	else if(strcmp("Prof.",Word) == 0)
		return	TRUE;
	else if(strcmp("Miss",Word) == 0)
		return	TRUE;
	else
		return	FALSE;
}

int	IsWordTail(char *Tail)
{
	if(strcmp("s",Tail) == 0)
		return	Tail_s;
	else if(strcmp("d",Tail) == 0)
		return	Tail_d;
	else if(strcmp("re",Tail) == 0)
		return	Tail_re;
	else if(strcmp("ll",Tail) == 0)
		return	Tail_ll;
	else if(strcmp("ve",Tail) == 0)
		return	Tail_ve;
	else if(strcmp("m",Tail) == 0)
		return	Tail_m;
	else if(strcmp("t",Tail) == 0)
		return	Tail_t;
	return	-1;
}

DictNode* CreateNewWord(char * Word)
{
	DictNode* pDictNode;
	int	i;
	pDictNode = new DictNode;
	i = strlen(Word);
	pDictNode->m_nLxhCate = NULL;
	if(i ==1)
		Mystrcpy(pDictNode,"sym",3);
	else
		Mystrcpy(pDictNode,"n",3);

	pDictNode->m_pszEnglish = new char[i+1];
	pDictNode->m_pszOrig = new char[i+1]; 
	
	strcpy(pDictNode->m_pszEnglish,Word);
	strcpy(pDictNode->m_pszOrig,Word);
	pDictNode->m_byDictForm = STYLE_ORIG;
	pDictNode->m_byDictNumber = Sing_Number;
	pDictNode->m_byMaxPhraseLen = DefaMaxPhraseLen;
	pDictNode->m_pNextWordNode = NULL;
	pDictNode->m_pPrevWordNode = NULL;

	pDictNode->m_nEnglishLen = i;
	pDictNode->m_bIsExistOrig = TRUE;
	pDictNode->m_nOrigLen = i;
	pDictNode->m_nWordStyle = 0;
	pDictNode->m_bIsExistAmbig = 0;
	pDictNode->m_pszAmbig = NULL;
	pDictNode->m_nAmbigLen = 0;
	pDictNode->m_nChinNum = 0;
	pDictNode->m_pFirstChin = NULL;
	
	return	pDictNode;
}

//cdq add begin
DictNode* CreateComposeWord(DictNode *pDictNode2,
							DictNode *pDictNode3,
							char * ComposeWord)
{//生成合成词
	DictNode* pDictNode;
	pDictNode = new DictNode;
	char *p;
	int nNewLen;

	int i;
	i=strlen(ComposeWord);
	pDictNode->m_pszEnglish = new char[i+1];
	pDictNode->m_pszOrig = new char[i+1]; 

	pDictNode->m_nLxhCate = NULL;

	strcpy(pDictNode->m_pszEnglish,ComposeWord);
	strcpy(pDictNode->m_pszOrig,ComposeWord);
	pDictNode->m_byDictForm = STYLE_ORIG;
	pDictNode->m_byDictNumber = NULL;
	pDictNode->m_byMaxPhraseLen = DefaMaxPhraseLen;
	pDictNode->m_pNextWordNode = NULL;
	pDictNode->m_pPrevWordNode = NULL;

	pDictNode->m_nEnglishLen = i;
	pDictNode->m_bIsExistOrig = TRUE;
	pDictNode->m_nOrigLen = i;
	pDictNode->m_nWordStyle = 0;
	pDictNode->m_bIsExistAmbig = 0;
	pDictNode->m_pszAmbig = NULL;
	pDictNode->m_nAmbigLen = 0;
	pDictNode->m_nChinNum = 1;


	pDictNode->m_pFirstChin = new DictChin;

	if ( SearchCate(pDictNode2,Cate_number) &&
		 SearchCate(pDictNode3,Cate_number) &&
		 ((p=strstr(pDictNode3->m_pFirstChin->m_pszChinese,"第"))==NULL) )
	{		
		strcat(pDictNode2->m_pFirstChin->m_pszChinese,pDictNode3->m_pFirstChin->m_pszChinese);
		nNewLen=strlen(pDictNode2->m_pFirstChin->m_pszChinese);
					
		pDictNode->m_pFirstChin->m_pszChinese=new char[nNewLen+2];
		strcpy(pDictNode->m_pFirstChin->m_pszChinese,
				pDictNode2->m_pFirstChin->m_pszChinese);
		pDictNode->m_pFirstChin->m_nChineseLen=
				strlen(pDictNode->m_pFirstChin->m_pszChinese);

		pDictNode->m_pFirstChin->m_nCate=Cate_number;
	}
	else if ( SearchCate(pDictNode2,Cate_number) &&
		 SearchCate(pDictNode3,Cate_number) &&
		 ((p=strstr(pDictNode3->m_pFirstChin->m_pszChinese,"第"))!=NULL) )
	{
		pDictNode3->m_pFirstChin->m_pszChinese=p+2;
		strcat(pDictNode3->m_pFirstChin->m_pszChinese,"分之");
		strcat(pDictNode3->m_pFirstChin->m_pszChinese,
			pDictNode2->m_pFirstChin->m_pszChinese);
		nNewLen=strlen(pDictNode3->m_pFirstChin->m_pszChinese);
					
		pDictNode->m_pFirstChin->m_pszChinese=new char[nNewLen+2];
		strcpy(pDictNode->m_pFirstChin->m_pszChinese,
				pDictNode3->m_pFirstChin->m_pszChinese);
		pDictNode->m_pFirstChin->m_nChineseLen=
				strlen(pDictNode->m_pFirstChin->m_pszChinese);

		pDictNode->m_pFirstChin->m_nCate=Cate_number;
	}
	else if (SearchCate(pDictNode2,Cate_adj))
	{
		while ( pDictNode2->m_pFirstChin->m_nCate != Cate_adj )
		{
			pDictNode2->m_pFirstChin=pDictNode2->m_pFirstChin->m_pNextChin;
		}
		if ( (p=strchr(pDictNode2->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode2->m_pFirstChin->m_nChineseLen=
					p-(pDictNode2->m_pFirstChin->m_pszChinese);					
			p[0]='\0';
		}	
		if ( strstr(ComposeWord,"ed")!=0 )
		{
			if (SearchCate(pDictNode3,Cate_n))
			{
				while ( pDictNode3->m_pFirstChin->m_nCate != Cate_n )
				{
					pDictNode3->m_pFirstChin=pDictNode3->m_pFirstChin->m_pNextChin;
				}
			}
		}
		if ( (p=strchr(pDictNode3->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode3->m_pFirstChin->m_nChineseLen=
					p-(pDictNode3->m_pFirstChin->m_pszChinese);
			p[0]='\0';
		}
		strcat(pDictNode2->m_pFirstChin->m_pszChinese,
			pDictNode3->m_pFirstChin->m_pszChinese);
		nNewLen=pDictNode2->m_pFirstChin->m_nChineseLen
					+pDictNode3->m_pFirstChin->m_nChineseLen;
		pDictNode->m_pFirstChin->m_pszChinese=new char[nNewLen+2];
		strcpy(pDictNode->m_pFirstChin->m_pszChinese,
				pDictNode2->m_pFirstChin->m_pszChinese);
		pDictNode->m_pFirstChin->m_nChineseLen=
				strlen(pDictNode->m_pFirstChin->m_pszChinese);

		if (  strstr(ComposeWord,"ed")!=0 ||
				pDictNode3->m_byDictForm==STYLE_ED ||
				pDictNode3->m_byDictForm==STYLE_ED1 ||
				pDictNode3->m_byDictForm==STYLE_ED2 ||
				pDictNode3->m_byDictForm==STYLE_ING )
			pDictNode->m_pFirstChin->m_nCate=Cate_adj;
		else if ( SearchCate(pDictNode3,Cate_n) )
			pDictNode->m_pFirstChin->m_nCate=Cate_n;
		else if ( SearchCate(pDictNode3,Cate_v) )
			pDictNode->m_pFirstChin->m_nCate=Cate_v;
		else
			pDictNode->m_pFirstChin->m_nCate=Cate_n;
	}
	else if (SearchCate(pDictNode2,Cate_adv))
	{
		while ( pDictNode2->m_pFirstChin->m_nCate != Cate_adv )
		{
			pDictNode2->m_pFirstChin=pDictNode2->m_pFirstChin->m_pNextChin;
		}
		if ( (p=strchr(pDictNode2->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode2->m_pFirstChin->m_nChineseLen=
					p-(pDictNode2->m_pFirstChin->m_pszChinese);					
			p[0]='\0';
		}	
		if ( (p=strchr(pDictNode3->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode3->m_pFirstChin->m_nChineseLen=
					p-(pDictNode3->m_pFirstChin->m_pszChinese);
			p[0]='\0';
		}
		strcat(pDictNode2->m_pFirstChin->m_pszChinese,
			pDictNode3->m_pFirstChin->m_pszChinese);
		nNewLen=pDictNode2->m_pFirstChin->m_nChineseLen
					+pDictNode3->m_pFirstChin->m_nChineseLen;
		pDictNode->m_pFirstChin->m_pszChinese=new char[nNewLen+2];
		strcpy(pDictNode->m_pFirstChin->m_pszChinese,
				pDictNode2->m_pFirstChin->m_pszChinese);
		pDictNode->m_pFirstChin->m_nChineseLen=
				strlen(pDictNode->m_pFirstChin->m_pszChinese);


		if( pDictNode3->m_byDictForm==STYLE_ED ||
				pDictNode3->m_byDictForm==STYLE_ED1 ||
				pDictNode3->m_byDictForm==STYLE_ED2 ||
				pDictNode3->m_byDictForm==STYLE_ING )
			pDictNode->m_pFirstChin->m_nCate=Cate_adj;
		else
			pDictNode->m_pFirstChin->m_nCate=Cate_n;
	}
	else if	(SearchCate(pDictNode2,Cate_n))
	{
		while ( pDictNode2->m_pFirstChin->m_nCate != Cate_n )
		{
			pDictNode2->m_pFirstChin=pDictNode2->m_pFirstChin->m_pNextChin;
		}
		if ( (p=strchr(pDictNode2->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode2->m_pFirstChin->m_nChineseLen=
					p-(pDictNode2->m_pFirstChin->m_pszChinese);					
			p[0]='\0';
		}	
		if ( (p=strchr(pDictNode3->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode3->m_pFirstChin->m_nChineseLen=
					p-(pDictNode3->m_pFirstChin->m_pszChinese);
			p[0]='\0';
		}
		strcat(pDictNode2->m_pFirstChin->m_pszChinese,
			pDictNode3->m_pFirstChin->m_pszChinese);
		nNewLen=pDictNode2->m_pFirstChin->m_nChineseLen
					+pDictNode3->m_pFirstChin->m_nChineseLen;
		pDictNode->m_pFirstChin->m_pszChinese=new char[nNewLen+2];
		strcpy(pDictNode->m_pFirstChin->m_pszChinese,
				pDictNode2->m_pFirstChin->m_pszChinese);
		pDictNode->m_pFirstChin->m_nChineseLen=
				strlen(pDictNode->m_pFirstChin->m_pszChinese);


		if( SearchCate(pDictNode3,Cate_adj) ||
			pDictNode3->m_byDictForm==STYLE_ED ||
			pDictNode3->m_byDictForm==STYLE_ED1 ||
			pDictNode3->m_byDictForm==STYLE_ED2 ||
				pDictNode3->m_byDictForm==STYLE_ING )
			pDictNode->m_pFirstChin->m_nCate=Cate_adj;
		else if ( SearchCate(pDictNode3,Cate_v) )  
			pDictNode->m_pFirstChin->m_nCate=Cate_v;
		else
			pDictNode->m_pFirstChin->m_nCate=Cate_n;
	}
	else if	(SearchCate(pDictNode2,Cate_v))
	{
		while ( pDictNode2->m_pFirstChin->m_nCate <= Cate_vd || 
			pDictNode2->m_pFirstChin->m_nCate >= Cate_vt )
		{
			pDictNode2->m_pFirstChin=pDictNode2->m_pFirstChin->m_pNextChin;
		}
		if ( (p=strchr(pDictNode2->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode2->m_pFirstChin->m_nChineseLen=
					p-(pDictNode2->m_pFirstChin->m_pszChinese);					
			p[0]='\0';
		}	
		if ( (p=strchr(pDictNode3->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode3->m_pFirstChin->m_nChineseLen=
					p-(pDictNode3->m_pFirstChin->m_pszChinese);
			p[0]='\0';
		}
		strcat(pDictNode2->m_pFirstChin->m_pszChinese,
			pDictNode3->m_pFirstChin->m_pszChinese);
		nNewLen=pDictNode2->m_pFirstChin->m_nChineseLen
					+pDictNode3->m_pFirstChin->m_nChineseLen;
		pDictNode->m_pFirstChin->m_pszChinese=new char[nNewLen+2];
		strcpy(pDictNode->m_pFirstChin->m_pszChinese,
				pDictNode2->m_pFirstChin->m_pszChinese);
		pDictNode->m_pFirstChin->m_nChineseLen=
				strlen(pDictNode->m_pFirstChin->m_pszChinese);

		pDictNode->m_pFirstChin->m_nCate=Cate_n;
	}
	else
	{
		if ( (p=strchr(pDictNode2->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode2->m_pFirstChin->m_nChineseLen=
					p-(pDictNode2->m_pFirstChin->m_pszChinese);					
			p[0]='\0';
		}	
		if ( (p=strchr(pDictNode3->m_pFirstChin->m_pszChinese,';'))!=NULL )
		{
			pDictNode3->m_pFirstChin->m_nChineseLen=
					p-(pDictNode3->m_pFirstChin->m_pszChinese);
			p[0]='\0';
		}
		strcat(pDictNode2->m_pFirstChin->m_pszChinese,
			pDictNode3->m_pFirstChin->m_pszChinese);
		nNewLen=pDictNode2->m_pFirstChin->m_nChineseLen
					+pDictNode3->m_pFirstChin->m_nChineseLen;
		pDictNode->m_pFirstChin->m_pszChinese=new char[nNewLen+2];
		strcpy(pDictNode->m_pFirstChin->m_pszChinese,
				pDictNode2->m_pFirstChin->m_pszChinese);
		pDictNode->m_pFirstChin->m_nChineseLen=
				strlen(pDictNode->m_pFirstChin->m_pszChinese);

		pDictNode->m_pFirstChin->m_nCate=Cate_n;
	}


	return pDictNode;

}

DictNode* CreateOrdinalWord(DictNode *pDictNode2,char *Word)
{
	DictNode* pDictNode;
	char temp[150];
	int	i;
	pDictNode = new DictNode;
	i = strlen(Word);
	pDictNode->m_nLxhCate = NULL;

	pDictNode->m_pszEnglish = new char[i+1];
	pDictNode->m_pszOrig = new char[i+1]; 
	
	strcpy(pDictNode->m_pszEnglish,Word);
	strcpy(pDictNode->m_pszOrig,Word);
	pDictNode->m_byDictForm = STYLE_ORIG;
	pDictNode->m_byDictNumber = Sing_Number;
	pDictNode->m_byMaxPhraseLen = DefaMaxPhraseLen;
	pDictNode->m_pNextWordNode = NULL;
	pDictNode->m_pPrevWordNode = NULL;

	pDictNode->m_nEnglishLen = i;
	pDictNode->m_bIsExistOrig = TRUE;
	pDictNode->m_nOrigLen = i;
	pDictNode->m_nWordStyle = 0;
	pDictNode->m_bIsExistAmbig = 0;
	pDictNode->m_pszAmbig = NULL;
	pDictNode->m_nAmbigLen = 0;
	pDictNode->m_nChinNum = 0;

	strcpy(temp,"第");
	strcat(temp,pDictNode2->m_pFirstChin->m_pszChinese);
	pDictNode->m_pFirstChin = new DictChin;
	i = strlen(temp);

	pDictNode->m_pFirstChin->m_pszChinese=new char[i+1];
	strcpy(pDictNode->m_pFirstChin->m_pszChinese,temp);
	pDictNode->m_pFirstChin->m_nChineseLen=
			strlen(pDictNode->m_pFirstChin->m_pszChinese);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -