⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.cpp

📁 经典的HMM算法的代码!以在文本中的应用为例
💻 CPP
字号:
#include <iostream>
#include <string>
#include <fstream>
#include <algorithm>
#include <sstream>

#include "Hmm.h"

using namespace std ;

//some constants
const int MaxTagNum = 44 ;

struct WordTagPair
{
	int iWordID ;
	int iTagID ;
} ;

string Tags[] = { "#",
	"$",
	"''",
	"(",
	")",
	",",
	".",
	":",
	"CC",
	"CD",
	"DT",
	"EX",
	"FW",
	"IN",
	"JJ",
	"JJR",
	"JJS",
	"MD",
	"NN",
	"NNP",
	"NNPS",
	"NNS",
	"PDT",
	"POS",
	"PRP",
	"PRP$",
	"RB",
	"RBR",
	"RBS",
	"RP",
	"SYM",
	"TO",
	"UH",
	"VB",
	"VBD",
	"VBG",
	"VBN",
	"VBP",
	"VBZ",
	"WDT",
	"WP",
	"WP$",
	"WRB",
	"``",
} ;

int main()
{
	cout<<"Hello world for HmmOne!"<<endl ;
	cout<<"Test for viterbi algorithm!"<<endl ;

	Hmm theHmm ;/////////////////////////////
	//contain the whole word list
	vector<string> WordList ;
	//initialize it from file of Lexicon.txt
	ifstream in ;
	in.open( "Lexicon.txt" ) ;
	if( !in.is_open() )
	{
		cerr<<"Can not open the file of Lexicon.txt to load the word list"<<endl ;
		exit( EXIT_FAILURE ) ;
	}
	string sTemp ;
	while( in.good() )
	{
		in>>sTemp ;
		WordList.push_back( sTemp ) ;
	}
	in.close() ;
/*
	//load the training file to memory struct
	vector<WordTagPair> WordTagPairArray ;
	in.clear() ;
	in.open( "train01.txt" ) ;
	//in.clear() ;
	if( !in.is_open() )
	{
		cerr<<"Can not open the file of train01.txt to load the word-tag pair list"<<endl ;
		exit( EXIT_FAILURE ) ;
	}
	string word ;
	string tag ;
	WordTagPair tempWordTagPair ;
	int WordTagPairNum = 0 ;
	while( in.good() )
	{
		getline( in, sTemp,'\n' ) ;
		istringstream inStream( sTemp ) ;
		inStream>>word>>tag ;
		//cout<<"the word is : "<<word<<endl ;
		//cout<<"the tag is : "<<tag<<endl ;
		vector<string>::iterator Iter = lower_bound( WordList.begin(), WordList.end(), word ) ;
		tempWordTagPair.iWordID = Iter - WordList.begin() ;
		int iTag = -1 ;
		string* pBegin = Tags ;
		string* pEnd = Tags + MaxTagNum ;
		string* pResult = NULL ;
		pResult = lower_bound( pBegin, pEnd, tag ) ;
		if( pResult )
			iTag = pResult - pBegin ;
		//////////////////////////// the binary search using c programme///////////////////////////
		int iLow = 0 ; 
		int iHigh = MaxTagNum ;
		while( iLow<=iHigh )
		{
			int iMid = ( iLow + iHigh ) / 2 ;
			if( Tags[iMid] == tag )
			{
				iTag = iMid ;
				break ;
			}
			else if( Tags[iMid] < tag )
				iLow = iMid + 1 ;
			else
				iHigh = iMid - 1 ;
		}
		///////////////////////////////////////////////////////////////////////////////////////////
		tempWordTagPair.iTagID = iTag ;
		WordTagPairArray.push_back( tempWordTagPair ) ;
		WordTagPairNum++ ;
		//cout<<"the number is : "<<WordTagPairNum <<endl ;
	}
	in.close() ;

	//initialize some variables
	int N = MaxTagNum ;
	int M = WordList.size() ;
	int iSeed = 1 ;
	Hmm theHmm( N, M, iSeed ) ;

	//statistic the frequency
	int* TagFreq = new int[N+1] ;
	int** TagTagFreq = iMatrix( 0, N, 0, N ) ;
	int** TagWordFreq ;
	TagWordFreq = iMatrix( 0, N, 0 , M ) ;
	//firstly initialize them
	for( int i=0 ; i<N+1 ; i++ )
		for( int j=0 ; j<N+1 ; j++ )
			TagTagFreq[i][j] = 0 ;
	for( i=0 ; i<N+1 ; i++ )
		TagFreq[i] = 0 ;
	for( i=0 ; i<N+1 ; i++ )
		for( int j=0 ; j<M+1 ; j++ )
			TagWordFreq[i][j] = 0 ;
	//secondly statistic
	double tagFreqSum = 0.0 ;
	for( i=0 ; i<(int)WordTagPairArray.size()-1 ; i++ )
	{
		TagFreq[WordTagPairArray[i].iTagID]++;
		TagTagFreq[WordTagPairArray[i].iTagID][WordTagPairArray[i+1].iTagID]++;
		TagWordFreq[WordTagPairArray[i].iTagID][WordTagPairArray[i].iWordID]++;
	}
	for( i=0 ; i<N ; i++ )
		tagFreqSum += TagFreq[i] ;

	//set the value of the matrix
	for (i = 0; i < theHmm.N; i++) 
	{ 
		for ( int j = 0; j < theHmm.N; j++) 
		{
			theHmm.A[i][j]=(double)TagTagFreq[i][j]/TagFreq[i]; 
		}
	}
	for ( int j = 0; j < theHmm.N ; j++) 
	{ 
		for ( int k = 0; k < theHmm.M ; k++) 
		{
			theHmm.B[j][k]=(double)TagWordFreq[j][k]/TagFreq[j];
		}
	}
	for ( i = 0; i < theHmm.N; i++) 
		theHmm.pi[i]=(double)TagFreq[i]/tagFreqSum;
*/
	//string filename( "HmmData.txt" ) ;
	theHmm.ReadHmm( "HmmData.txt" ) ;
	//theHmm.ReadHmm( filename) ;
	//test viterbi
	int T=3;
	char test_string[] ="I love you";
	vector<int> O ;
	vector<int> S ;
	char* p=test_string;
int	i=1;/////////////////
	char temp[256] ;
	while (i<=T){
		int j=0;/////////////////
		while ( (*p!=' ')&&(*p)){
			temp[j++] = *p;
			p++;
		}
		temp[j]=0;
		O.push_back( (find( WordList.begin(), WordList.end(), temp )-WordList.begin() ) ) ;
		i++;
		while ((*p==' ')&&i<=T) p++;
	};

	////////test generate sequence//////////////
	int t = 15 ;
	int seed = 108 ;
	vector<int> theSymbol ;
	vector<int> theState ;
	theHmm.GenerateSequence( seed, t, theSymbol, theState ) ;
	for( i=0 ; i<t ; i++ )
	{
		cout<<"the "<<(i+1)<<" word is : "<<WordList[theSymbol[i]]<<" according to state "<<theState[i]<<endl ;
	}
	////////////////end/////////////////////////

	/////////test baum-welch here//////////////
	double probInit = 0.0 ;
	double probFinal = 0.0 ;
	theHmm.BaumWelch( T, O, probInit, probFinal ) ;
	cout<<"the initial probability is : "<<probInit<<endl ;
	cout<<"the final probability is : "<<probFinal<<endl ;
	///////////////end ////////////////////////
	
	double dProb ;
	dProb = theHmm.Viterbi( T, O, S ) ;
	vector<int>::iterator SBegin = S.begin() ;
	vector<int>::iterator SEnd = S.end() ;
	while( SBegin != SEnd )
	{
		cout<<Tags[(*SBegin)]<<" " ;
		SBegin++ ;
	}
	cout<<endl ;

	double probForward = theHmm.Forward( T, O ) ;
	cout<<"the probability of forward algorithm is : "<<probForward<<endl ;
	double probBackward = theHmm.Backward( T, O ) ;
	cout<<"the probability of backward algorithm is : "<<probBackward<<endl ;

	//string filename( "HmmData.txt" ) ;
	//theHmm.WriteHmm( filename ) ;

	return 0 ;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -