⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utility.cpp

📁 这是一个中科院中文词法分析器
💻 CPP
📖 第 1 页 / 共 2 页
字号:
		}
	}
	if(i<nLen)//Get middle delimiter such as .
	{
		sChar[0]=sString[i++];
		if(sChar[0]<0)//Get first char
			sChar[1]=sString[i++];
		else
			sChar[1]=0;
		if(!CC_Find("百千万亿佰仟%‰",sChar)&&sChar[0]!='%')
			i-=strlen(sChar);
	}
	if(i>=nLen)
		return true;
	return false;
}
/*********************************************************************
 *
 *  Func Name  : IsAllIndex
 *
 *  Description: Judge the string is all made up of Index Num Char
 *              
 *
 *  Parameters : sSentence: the original sentence which includes Chinese or Non-Chinese char
 *    
 *  Returns    : the end of the sub-sentence
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-24
 *********************************************************************/
bool IsAllIndex(unsigned char *sString)
{
	unsigned int nLen=strlen((const char *)sString),i=0;
	while(i<nLen-1&&sString[i]==162)
	{
		i+=2;
	}
	if(i>=nLen)
		return true;
	 while(i<nLen&&(sString[i]>'A'-1&&sString[i]<'Z'+1)||(sString[i]>'a'-1&&sString[i]<'z'+1))
	 {//single byte number char
		i+=1;
	 }

	if(i<nLen)
		return false;
	return true;

}
/*********************************************************************
 *
 *  Func Name  : IsAllLetter
 *
 *  Description: Judge the string is all made up of Letter Char
 *              
 *
 *  Parameters : sSentence: the original sentence which includes Chinese or Non-Chinese char
 *    
 *  Returns    : the end of the sub-sentence
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-24
 *********************************************************************/
bool IsAllLetter(unsigned char *sString)
{
	unsigned int nLen=strlen((const char *)sString),i=0;
	while(i<nLen-1&&sString[i]==163&&((sString[i+1]>=193&&sString[i+1]<=218)||(sString[i+1]>=225&&sString[i+1]<=250)))
	{
		i+=2;
	}
	if(i<nLen)
		return false;

	return true;
}
/*********************************************************************
 *
 *  Func Name  : IsAllDelimiter
 *
 *  Description: Judge the string is all made up of Delimiter
 *              
 *
 *  Parameters : sSentence: the original sentence which includes Chinese or Non-Chinese char
 *    
 *  Returns    : the end of the sub-sentence
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-24
 *********************************************************************/
bool IsAllDelimiter(unsigned char *sString)
{
	unsigned int nLen=strlen((const char *)sString),i=0;
	while(i<nLen-1&&(sString[i]==161||sString[i]==163))
	{
		i+=2;
	}
	if(i<nLen)
		return false;
	return true;
}
/*********************************************************************
 *
 *  Func Name  : BinarySearch
 *
 *  Description: Lookup the index of nVal in the table nTable which length is nTableLen
 *
 *  Parameters : nPOS: the POS value
 *
 *  Returns    : the index value
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-25
 *********************************************************************/
int BinarySearch(int nVal, int *nTable,int nTableLen)
{
	int nStart=0,nEnd=nTableLen-1,nMid=(nStart+nEnd)/2;
	while(nStart<=nEnd)//Binary search
	{
       if(nTable[nMid]==nVal)
	   {
			return nMid;//find it
	   }
	   else if(nTable[nMid]<nVal)
	   {
		   nStart=nMid+1;
	   }
	   else
       {
		   nEnd=nMid-1;
	   }
	   nMid=(nStart+nEnd)/2;
	}
	return -1;//Can not find it;
}
/*********************************************************************
 *
 *  Func Name  : IsForeign
 *
 *  Description: Decide whether the word is not a Non-fereign word
 *
 *  Parameters : sWord: the word
 *
 *  Returns    : the index value
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-26
 *********************************************************************/
bool IsForeign(char *sWord)
{
  int nForeignCount=GetForeignCharCount(sWord),nCharCount=strlen(sWord);
  if(nCharCount>2||nForeignCount>=1*nCharCount/2)
	  return true;
  return false;
}
/*********************************************************************
 *
 *  Func Name  : IsAllForeign
 *
 *  Description: Decide whether the word is not a Non-fereign word
 *
 *  Parameters : sWord: the word
 *
 *  Returns    : the index value
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-3-25
 *********************************************************************/
bool IsAllForeign(char *sWord)
{
  unsigned int nForeignCount=(unsigned int)GetForeignCharCount(sWord);
  if(2*nForeignCount==strlen(sWord))
	  return true;
  return false;
}
/*********************************************************************
 *
 *  Func Name  : IsForeign
 *
 *  Description: Decide whether the word is Chinese Num word
 *
 *  Parameters : sWord: the word
 *
 *  Returns    : the index value
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-1-26
 *********************************************************************/
bool IsAllChineseNum(char *sWord)
{//百分之五点六的人早上八点十八分起床
  unsigned int  k; 
  char tchar[3];
  char ChineseNum[]="零○一二两三四五六七八九十廿百千万亿壹贰叁肆伍陆柒捌玖拾佰仟∶·./点";//
  char sPrefix[]="几数第上成";
  for(k = 0; k < strlen(sWord); k+=2)
  {
     strncpy(tchar,sWord+k,2) ;
     tchar[2]='\0';
	 if(strncmp(sWord+k,"分之",4)==0)//百分之五
	 {
		k+=2;
		continue;
	 }

	 if(!CC_Find(ChineseNum, tchar)&&!(k==0&&CC_Find(sPrefix, tchar)))
		 return false;
  }
  return true;
}
/*********************************************************************
 *
 *  Func Name  : GetForeignCharCount
 *
 *  Description: 
 *
 *  Parameters : sWord: the word
 *
 *  Returns    : the index value
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-4-4
 *              2.Modify  2002-5-21
 *********************************************************************/
int GetForeignCharCount(char *sWord)
{
  unsigned int nForeignCount,nCount;
  nForeignCount=GetCharCount(TRANS_ENGLISH,sWord);//English char counnts
  nCount=GetCharCount(TRANS_JAPANESE,sWord);//Japan char counnts
  if(nForeignCount<=nCount)
	nForeignCount=nCount;
  nCount=GetCharCount(TRANS_RUSSIAN,sWord);//Russian char counnts
  if(nForeignCount<=nCount)
	nForeignCount=nCount;
  return nForeignCount;
}
/*********************************************************************
 *
 *  Func Name  : GetCharCount
 *
 *  Description: Get the count of char which is in sWord and in sCharSet
 *
 *  Parameters : sWord: the word
 * 
 *  Returns    : COUNT
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-5-21
 *********************************************************************/
int GetCharCount(char *sCharSet,char *sWord)
{
  unsigned int  k=0; 
  char tchar[3];
  int nCount=0;
  tchar[2]=0;
  while(k < strlen(sWord))
  {
     tchar[0]=sWord[k];
 	 tchar[1]=0;
	 if(sWord[k]<0)
	 {
		 tchar[1]=sWord[k+1];
		 k+=1;
	 }
	 k+=1;
	 if((tchar[0]<0&&CC_Find(sCharSet, tchar))||strchr(sCharSet,tchar[0]))
          nCount++;
  }
  return nCount;
}
/*********************************************************************
 *
 *  Func Name  : GetForeignCharCount
 *
 *  Description: Return the foreign type 
 *
 *  Parameters : sWord: the word
 *
 *  Returns    : the index value
 *  Author     : Kevin Zhang  
 *  History    : 
 *              1.create 2002-4-4
 *              2.Modify  2002-5-21
 *********************************************************************/
int GetForeignType(char *sWord)
{
  unsigned int nForeignCount,nCount,nType=TT_ENGLISH;
  nForeignCount=GetCharCount(TRANS_ENGLISH,sWord);//English char counnts
  nCount=GetCharCount(TRANS_RUSSIAN,sWord);//Russian char counnts
  if(nForeignCount<nCount)
  {
	  nForeignCount=nCount;
	  nType=TT_RUSSIAN;
  }
  nCount=GetCharCount(TRANS_JAPANESE,sWord);//Japan char counnts
  if(nForeignCount<nCount)
  {
	  nForeignCount=nCount;
	  nType=TT_JAPANESE;
  }
  return nType;
}
bool PostfixSplit(char *sWord, char *sWordRet, char *sPostfix)
{
	char sSinglePostfix[]=POSTFIX_SINGLE;
	char sMultiPostfix[][9]=POSTFIX_MUTIPLE;
	unsigned int nPostfixLen=0,nWordLen=strlen(sWord);
	int i=0;

	while(sMultiPostfix[i][0]!=0&&strncmp(sWord+nWordLen-strlen(sMultiPostfix[i]),sMultiPostfix[i],strlen(sMultiPostfix[i]))!=0)
	{//Try to get the postfix of an address
		i++;
	}
	strcpy(sPostfix,sMultiPostfix[i]);
	nPostfixLen=strlen(sMultiPostfix[i]);//Get the length of place postfix

	if(nPostfixLen==0)
	{
		sPostfix[2]=0;
		strncpy(sPostfix,sWord+nWordLen-2,2);
		if(CC_Find(sSinglePostfix,sPostfix))
			nPostfixLen=2;
	}
	
	strncpy(sWordRet,sWord,nWordLen-nPostfixLen);
	sWordRet[nWordLen-nPostfixLen]=0;//Get the place name which have erasing the postfix
	sPostfix[nPostfixLen]=0;
    return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -