hmm.cpp

来自「经典的HMM算法的代码！以在文本中的应用为例」· C++ 代码 · 共 1,721 行 · 第 1/3 页
CPP
1,721 行

	out.close() ;
}

void Hmm::ReadSequence( ifstream& in, vector<int>& sequence )
{
	cout<<"Now read the observation sequence from file"<<endl ;

	int iTemp ;
	if( !in.is_open() )
	{
		cerr<<"Wrong file stream"<<endl ;
		exit( EXIT_FAILURE ) ;
	}

	while( !in.eof() )
	{
		in>>iTemp ;
		sequence.push_back( iTemp ) ;
	}

}

void Hmm::ReadSequence( FILE* pFile, int*& sequence, int& iNum )
{
	if( pFile == NULL )
	{
		cerr<<"Wrong file stream"<<endl ;
		exit( EXIT_FAILURE ) ;
	}

	iNum = 0 ;
	int iSize = 0 ;
	sequence = (int*)malloc( sizeof(int) ) ;//just allocate memory for one item
	while( !feof(pFile) )
	{
		if( iNum > iSize )
		{
			iSize = 2*iNum ;
			sequence = (int*)realloc( sequence,iSize*sizeof(int) ) ;
		}
		if( sequence )
		{
			fscanf( pFile, "%d", &sequence[iNum] ) ;
			iNum++ ;
		}
		else
		{
			cerr<<"Error in reallcate memory"<<endl ;
		}
	}

}
/********************************the input-output function end ******************************************/

/********************************the core functions of Hmm **********************************************/
double Hmm::Forward( int T, vector<int>& O )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( alpha != NULL )
	{
		//alpha is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of alpha
		dFreeMatrix( alpha, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
	}
	//allocate memory
	alpha = dMatrix( 0, T-1, 0, N-1 ) ;
	//initialize
	for( int t=0 ; t<T ; t++ )
		for( int j=0 ; j<N ; j++ )
			alpha[t][j] = 0.0 ;
	//set the iPreT with the current value
	iPreT = T ;

	//***************this is the forward precedure below****************************************/

	double sum = 0.0 ;
	double dProbability = 0.0 ;

	//step one: initialize
	for( int i=0 ; i<N ; i++ )
	{
		alpha[0][i] = pi[i] * B[i][O[0]] ;
	}

	//step two ; induction
	for( t=1 ; t<T ; t++ )
	{
		for( int j=0 ; j<N ; j++ )
		{
			sum = 0.0 ;
			for( int i=0 ; i<N ; i++ )
			{
				sum += alpha[t-1][i] * A[i][j] ;
			}
			alpha[t][j] = sum * B[j][O[t]] ;
		}
	}

	//step three : termination
	for( i=0 ; i<N ; i++ )
	{
		dProbability += alpha[T-1][i] ;
	}

	return dProbability ;
}

double Hmm::Forward( int T, int* O )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( alpha != NULL )
	{
		//alpha is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of alpha
		dFreeMatrix( alpha, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
	}
	//allocate memory
	alpha = dMatrix( 0, T-1, 0, N-1 ) ;
	//initialize
	for( int t=0 ; t<T ; t++ )
		for( int j=0 ; j<N ; j++ )
			alpha[t][j] = 0.0 ;
	//set the iPreT with the current value
	iPreT = T ;

	//***************this is the forward precedure below****************************************/

	double sum = 0.0 ;
	double dProbability = 0.0 ;

	//step one: initialize
	for( int i=0 ; i<N ; i++ )
	{
		alpha[0][i] = pi[i] * B[i][O[0]] ;
	}

	//step two ; induction
	for( t=1 ; t<T ; t++ )
	{
		for( int j=0 ; j<N ; j++ )
		{
			sum = 0.0 ;
			for( int i=0 ; i<N ; i++ )
			{
				sum += alpha[t-1][i] * A[i][j] ;
			}
			alpha[t][j] = sum * B[j][O[t]] ;
		}
	}

	//step three : termination
	for( i=0 ; i<N ; i++ )
	{
		dProbability += alpha[T-1][i] ;
	}

	return dProbability ;
}

double Hmm::ForwardNormalized( int T, vector<int>& O )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( alpha != NULL )
	{
		//alpha is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of alpha
		dFreeMatrix( alpha, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
	}
	//allocate memory
	alpha = dMatrix( 0, T-1, 0, N-1 ) ;
	//initialize
	for( int t=0 ; t<T ; t++ )
		for( int j=0 ; j<N ; j++ )
			alpha[t][j] = 0.0 ;
	//set the iPreT with the current value
	iPreT = T ;

	//***************this is the forward normalized precedure below*****************************/

	double sum = 0.0 ;
	double dLogProbability = 0.0 ;
	//the scale variable to normalize the matrix alpha and return the log value of the sum of the matrix value
	vector<double> Scale ;
	Scale.resize( T ) ;

	//step one: initialize
	for( int i=0 ; i<N ; i++ )
	{
		alpha[0][i] = pi[i] * B[i][O[0]] ;
		Scale[0] += alpha[0][i] ;
	}
	//normalize
	for( i=0 ; i<N ; i++ )
		alpha[0][i] /= Scale[0] ;

	//step two ; induction
	for( t=1 ; t<T ; t++ )
	{
		Scale[t] = 0.0 ;
		for( int j=0 ; j<N ; j++ )
		{
			sum = 0.0 ;
			for( int i=0 ; i<N ; i++ )
			{
				sum += alpha[t-1][i] * A[i][j] ;
			}
			alpha[t][j] = sum * B[j][O[t]] ;
			Scale[t] += alpha[t][j] ;
		}
		//normalize
		for( j=0 ;j<N ; j++ )
			alpha[t][j] /= Scale[t] ;
	}

	//step three : termination
	/***************************************************************************************************/
	//the reference code return the log value of all the product of the probabiltiy of matrix alpha
	//that is, the sum of the vector of scale[t],why dose do so, i still can not understand.
	//i regard it's wrong. i think it should directly return the value of of log( Scale[T-1] )
	//note: the log function is to smooth the harss orignal value which is much small
	/***************************************************************************************************/
	dLogProbability = log( Scale[T-1] ) ;

	return dLogProbability ;
}

double Hmm::ForwardNormalized( int T, int* O )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( alpha != NULL )
	{
		//alpha is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of alpha
		dFreeMatrix( alpha, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
	}
	//allocate memory
	alpha = dMatrix( 0, T-1, 0, N-1 ) ;
	//initialize
	for( int t=0 ; t<T ; t++ )
		for( int j=0 ; j<N ; j++ )
			alpha[t][j] = 0.0 ;
	//set the iPreT with the current value
	iPreT = T ;

	//***************this is the forward normalized precedure below*****************************/

	double sum = 0.0 ;
	double dLogProbability = 0.0 ;
	//the scale variable to normalize the matrix alpha and return the log value of the sum of the matrix value
	vector<double> Scale ;
	Scale.resize( T ) ;

	//step one: initialize
	for( int i=0 ; i<N ; i++ )
	{
		alpha[0][i] = pi[i] * B[i][O[0]] ;
		Scale[0] += alpha[0][i] ;
	}
	//normalize
	for( i=0 ; i<N ; i++ )
		alpha[0][i] /= Scale[0] ;

	//step two : induction
	for( t=1 ; t<T ; t++ )
	{
		Scale[t] = 0.0 ;
		for( int j=0 ; j<N ; j++ )
		{
			sum = 0.0 ;
			for( int i=0 ; i<N ; i++ )
			{
				sum += alpha[t-1][i] * A[i][j] ;
			}
			alpha[t][j] = sum * B[j][O[t]] ;
			Scale[t] += alpha[t][j] ;
		}
		//normalize
		for( j=0 ;j<N ; j++ )
			alpha[t][j] /= Scale[t] ;
	}

	//step three : termination
	/***************************************************************************************************/
	//the reference code return the log value of all the product of the probabiltiy of matrix alpha
	//that is, the sum of the vector of scale[t],why dose do so, i still can not understand.
	//i regard it's wrong. i think it should directly return the value of of log( Scale[T-1] )
	//note: the log function is to smooth the harss orignal value which is much small
	/***************************************************************************************************/
	dLogProbability = log( Scale[T-1] ) ;

	return dLogProbability ;
}

double Hmm::Backward( int T, vector<int>& O )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( beta != NULL )
	{
		//beta is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of beta
		dFreeMatrix( beta, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
	}
	//allocate memory
	beta = dMatrix( 0, T-1, 0, N-1 ) ;
	//initialize
	for( int t=0 ; t<T ; t++ )
		for( int i=0 ; i<N ; i++ )
			beta[t][i] = 0.0 ;
	//set the iPreT with the current value
	iPreT = T ;

	//*********************** this is the backward procedure below***************************//

	double sum = 0.0 ;
	//step one : initialize
	for( int i=0 ; i<N ; i++ )
		beta[T-1][i] = 1 ;

	//step two : induction
	for( t=T-2 ; t>=0 ; t-- )
	{
		for( int i=0 ; i<N ; i++ )
		{
			sum = 0.0 ;
			for( int j=0 ; j<N ; j++ )
			{
				sum += A[i][j] * B[j][O[t+1]] * beta[t+1][j] ;
			}
			beta[t][i] = sum ;
		}
	}

	//step three : termination
	double dProbability = 0.0 ;
	for( i=0 ; i<N ; i++ )
		dProbability += pi[i] * B[i][O[0]] * beta[0][i] ;

	return dProbability ;
}

double Hmm::Backward( int T, int* O )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( beta != NULL )
	{
		//beta is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of beta
		dFreeMatrix( beta, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
	}
	//allocate memory
	beta = dMatrix( 0, T-1, 0, N-1 ) ;
	//initialize
	for( int t=0 ; t<T ; t++ )
		for( int i=0 ; i<N ; i++ )
			beta[t][i] = 0.0 ;
	//set the iPreT with the current value
	iPreT = T ;

	//*********************** this is the backward procedure below***************************//

	double sum = 0.0 ;
	//step one : initialize
	for( int i=0 ; i<N ; i++ )
		beta[T-1][i] = 1 ;

	//step two : induction
	for( t=T-2 ; t>=0 ; t-- )
	{
		for( int i=0 ; i<N ; i++ )
		{
			sum = 0.0 ;
			for( int j=0 ; j<N ; j++ )
			{
				sum += A[i][j] * B[j][O[t+1]] * beta[t+1][j] ;
			}
			beta[t][i] = sum ;
		}
	}

	//step three : termination
	double dProbability = 0.0 ;
	for( i=0 ; i<N ; i++ )
		dProbability += pi[i] * B[i][O[0]] * beta[0][i] ; 

	return dProbability ;
}

double Hmm::BackwardNormalized( int T, vector<int>& O )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( beta != NULL )
	{
		//beta is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of beta
		dFreeMatrix( beta, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
	}
	//allocate memory
	beta = dMatrix( 0, T-1, 0, N-1 ) ;
	//initialize
	for( int t=0 ; t<T ; t++ )
		for( int i=0 ; i<N ; i++ )
			beta[t][i] = 0.0 ;
	//set the iPreT with the current value
	iPreT = T ;

	//*********************** this is the backward procedure below***************************//

	double sum = 0.0 ;
	//the scale variable to normalize the matrix beta and return the log value of the sum of the matrix value
	vector<double> Scale ;
	Scale.resize( T ) ;

	//step one : initialize
	for( int i=0 ; i<N ; i++ )
		beta[T-1][i] = 1 ;
	//normalize
	for( i=0 ; i<N ; i++ )
		beta[T-1][i] /= N ;

	//step two : induction
	for( t=T-2 ; t>=0 ; t-- )
	{
		Scale[t] = 0.0 ;
		for( int i=0 ; i<N ; i++ )
		{
			sum = 0.0 ;
			for( int j=0 ; j<N ; j++ )
			{
				sum += A[i][j] * B[j][O[t+1]] * beta[t+1][j] ;
			}
			beta[t][i] = sum ;
			Scale[t] += beta[t][i] ;
		}
		//normalize
		for( i=0 ; i<N ; i++ )
			beta[t][i] /= Scale[t] ;
	}

	//step three : termination
	double dLogProbability = 0.0 ;
	double temp = 0.0 ;
	for( i=0 ; i<N ; i++ )
		temp += pi[0] * B[i][O[0]] * beta[0][i] ;
	dLogProbability = log( temp ) ;

	return dLogProbability ;
}

double Hmm::BackwardNormalized( int T, int* O )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( beta != NULL )
	{
		//beta is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of beta
		dFreeMatrix( beta, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
	}
	//allocate memory
	beta = dMatrix( 0, T-1, 0, N-1 ) ;
	//initialize
	for( int t=0 ; t<T ; t++ )
		for( int i=0 ; i<N ; i++ )
			beta[t][i] = 0.0 ;
	//set the iPreT with the current value
	iPreT = T ;

	//*********************** this is the backward procedure below***************************//

	double sum = 0.0 ;
	//the scale variable to normalize the matrix beta and return the log value of the sum of the matrix value
	vector<double> Scale ;
	Scale.resize( T ) ;

	//step one : initialize
	for( int i=0 ; i<N ; i++ )
		beta[T-1][i] = 1 ;
	//normalize
	for( i=0 ; i<N ; i++ )
		beta[T-1][i] /= N ;

	//step two : induction
	for( t=T-2 ; t>=0 ; t-- )
	{
		Scale[t] = 0.0 ;
		for( int i=0 ; i<N ; i++ )
		{
			sum = 0.0 ;
			for( int j=0 ; j<N ; j++ )
			{
				sum += A[i][j] * B[j][O[t+1]] * beta[t+1][j] ;
			}
			beta[t][i] = sum ;
			Scale[t] += beta[t][i] ;
		}
		//normalize
		for( i=0 ; i<N ; i++ )
			beta[t][i] /= Scale[t] ;
	}

	//step three : termination
	double dLogProbability = 0.0 ;
	double temp = 0.0 ;
	for( i=0 ; i<N ; i++ )
		temp += pi[0] * B[i][O[0]] * beta[0][i] ;
	//dLogProbability = log( temp ) ;
	dLogProbability = log( Scale[0] ) ;

	return dLogProbability ;
}

double Hmm::Viterbi( int T, vector<int>& O, vector<int>& S )
{
	//to check the observation sequence
	if( T == 0 )
		return FALSEVALUE ;
	if( delta != NULL )
	{
		//delta is not empty indicate that this is a new observation sequence
		//then reallocate the memory is needed
		//fisrt free the memory of beta
		dFreeMatrix( delta, 0, iPreT-1, 0, N-1 ) ;/*the fourth parament is no use here since the third one is always 0*/
		//set the iPreT with the current value
		iPreT = T ;
	}
	//allocate memory
	delta = dMatrix( 0, T-1, 0, N-1 ) ;

	//allocate the memory for the state sequence S
	S.resize( T ) ;

	//***************** this is the viterbi algorithm********************************/

	//the matrix for back trace
	int** Psi ;
	//allocate memory for it
	Psi = iMatrix( 0, T-1, 0, N-1 ) ;

	//step one : initialization
	for( int i=0 ; i<N ; i++ )
	{
		delta[0][i] = pi[i] * B[i][O[0]] ;
		Psi[0][i] = 0 ;
	}

	//step two : induction
	double dMax = 0.0 ;
	double dValue = 0.0 ;
	for( int t=1 ; t<T ; t++ )
	{
hmm.cpp - 源码说明

本页面展示了「经典的HMM算法的代码！以在文本中的应用为例」中的 hmm.cpp 源码文件，采用 C++ 编程语言编写，共 1,721 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与HMM相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?