📄 hmm.cpp

📁 about hidden Markov model every algorithm
💻 CPP
字号:
#include "..\\include\\hmm.h"
#include <fstream.h>
#include <stdlib.h>
#include <iostream.h>
#include <iomanip.h>
#include <math.h>

/*
 * Class constructor. Initializes an HMM given
 * number of states and output vocabulary size.
 */
HMM::HMM(int numOfStates, int vocabSize)
{
	int i;

	// make sure that the number of states and size of
  	// vocab is at least one
  	if(numOfStates < 1)
  		numOfStates = 1;
  	if(vocabSize < 1)
  		vocabSize = 1;
    numStates = numOfStates;
    sigmaSize = vocabSize;
	dimensions = -1;

    pi = new double[numStates];
    a = new double*[numStates];
	for(i = 0; i < numStates; i++)
		a[i] = new double[numStates];
    b = new double*[numStates];
	for(i = 0; i < numStates; i++)
		b[i] = new double[sigmaSize];
}

/*
 * Class constructor. Initializes an HMM a given
 * number of states, output vocabulary size, and
 * number of dimensions for its observation symbols.
 */
HMM::HMM(int numOfStates, int vocabSize, int d)
{
	int i;

  	// make sure that the number of states, size of
  	// vocab, and number of dimensions are at least one
  	if(numOfStates < 1)
  		numOfStates = 1;
  	if(vocabSize < 1)
  		vocabSize = 1;
  	if(d < 1)
  		d = 1;
    numStates = numOfStates;
    sigmaSize = vocabSize;
	dimensions = d;

    pi = new double[numStates];
    a = new double*[numStates];
	for(i = 0; i < numStates; i++)
		a[i] = new double[numStates];
    b = new double*[numStates];
	for(i = 0; i < numStates; i++)
		b[i] = new double[sigmaSize];
    V = new double*[sigmaSize];
	for(i = 0; i < sigmaSize; i++)
		V[i] = new double[d];
}

/*
 * Class destructors. Frees up the space we allocated on the
 * heap for storing our member variables.
 */
HMM::~HMM()
{
	int i;

	// clean up
	delete [] pi;

	for(i = 0; i < numStates; i++) {
		delete [] a[i];
		delete [] b[i];
	}
	delete [] a;
	delete [] b;

	if(dimensions != -1) {
		for(i = 0; i < sigmaSize; i++)
			delete [] V[i];
		delete [] V;
	}
}

/*
 * Calculation of Forward variables f(i,t) for state i at time
 * t for sequence o with the current HMM parameters.
 */
double** HMM::forwardProc(int* o, int T)
{
    double** fwd;
	int i, j, t;
	
	fwd = new double*[numStates];
	for(i = 0; i < numStates; i++)
		fwd[i] = new double[T];

    // initialization (time 0)
    for(i = 0; i < numStates; i++)
      fwd[i][0] = pi[i] * b[i][o[0]];

    // induction
    for(t = 0; t <= T-2; t++) {
      for(j = 0; j < numStates; j++) {
		fwd[j][t+1] = 0;
		for(i = 0; i < numStates; i++)
		  fwd[j][t+1] += (fwd[i][t] * a[i][j]);
		fwd[j][t+1] *= b[j][o[t+1]];
      }
    }

    return fwd;
}

/*
 * Calculation of Backward variables b(i,t) for state i at time
 * t for sequence o with the current HMM parameters.
 */
double** HMM::backwardProc(int* o, int T)
{
    double** bwd;
	int i, j, t;
	
	bwd = new double*[numStates];
	for(i = 0; i < numStates; i++)
		bwd[i] = new double[T];

    // initialization (time 0)
    for(i = 0; i < numStates; i++)
      bwd[i][T-1] = 1;

    // induction
    for(t = T - 2; t >= 0; t--) {
      for(i = 0; i < numStates; i++) {
		bwd[i][t] = 0;
		for (j = 0; j < numStates; j++)
		  bwd[i][t] += (bwd[j][t+1] * a[i][j] * b[j][o[t+1]]);
      }
    }

    return bwd;
}

/*
 * Viterbi algorithm for hidden Markov models. Given an
 * observation sequence o, Viterbi finds the best possible
 * state sequence for o on this HMM, along with the state
 * optimized probability.
 */
double** HMM::viterbi(int* o, int T)
{
	int i, j, t;
  	int min_state;
  	double min_weight, weight;
  	double stateOptProb = 0.0;
  	int* Q = new int[T];
  	int** sTable;// = new int[numStates][T];
  	double** aTable;// = new double[numStates][T];
  	double** answer;// = new double[2][T];

	sTable = new int*[numStates];
	for(i = 0; i < numStates; i++)
		sTable[i] = new int[T];

	aTable = new double*[numStates];
	for(i = 0; i < numStates; i++)
		aTable[i] = new double[T];

	answer = new double*[2];
	answer[0] = new double;
	answer[1] = new double[T];
  	
  	// calulate accumulations and best states for time 0
  	for(i = 0; i < numStates; i++) {
  		aTable[i][0] = -1*log(pi[i]) - log(b[i][o[0]]);
  		sTable[i][0] = 0;
  	}
  	
  	// fill up the rest of the tables
  	for(t = 1; t < T; t++) {
  		for(j = 0; j < numStates; j++) {
  			min_weight = aTable[0][t-1] - log(a[0][j]);
  			min_state = 0;
  			for(i = 1; i < numStates; i++) {
  				weight = aTable[i][t-1] - log(a[i][j]);
  				if(weight < min_weight) {
  					min_weight = weight;
  					min_state = i;
  				}
  			}
  			aTable[j][t] = min_weight - log(b[j][o[t]]);
  			sTable[j][t] = min_state;
  		}
  	}
  	
  	// find minimum value for time T-1
  	min_weight = aTable[0][T-1];
  	min_state = 1;
  	for(i = 1; i < numStates; i++) {
  		if(aTable[i][T-1] < min_weight) {
  			min_weight = aTable[i][T-1];
  			min_state = i;
  		}
  	}

  	// trace back to find the optimized state sequence
  	Q[T-1] = min_state;
  	for(t = T-2; t >= 0; t--)
  		Q[t] = sTable[Q[t+1]][t+1];
  	
  	// store answers and return them
  	answer[0][0] = min_weight;
  	for(i = 0; i < T; i++)
  		answer[1][i] = Q[i];
  	return answer;
}

/*
 * Prints all the parameters of this HMM. Parameters include initial, 
 * transition, and output probabilities.
 */
void HMM::print()
{
	int i, j, k;

	// print the intial probabilities
    for(i = 0; i < numStates; i++)
		cout << "pi(" << i << ") = " << setprecision(3) << pi[i] << " "; 
	cout << endl << endl;

	// print the transition probabilities
    for(i = 0; i < numStates; i++) {
      for(j = 0; j < numStates; j++)
		  cout << "a(" << i << "," << j << ") = " << setprecision(3) << a[i][j] << " ";
	  cout << endl;
	}

	// print the emission probabilities
	cout << endl;
    for(i = 0; i < numStates; i++) {
      for(k = 0; k < sigmaSize; k++)
		  cout << "b(" << i << "," << k << ") = " << setprecision(3) << b[i][k] << " ";
	  cout << endl;
    }
}

/*
 * Converts a sequence of actually values into their corresponding
 * indices based on the vocabulary of this HMM. Any time a hidden
 * Markov model algorithm whose input involves an observation sequence
 * is used, the observation sequence should be converted to indices
 * if it's not already.
 */
int* HMM::convert(double** sequence, int T)
{
  	double* o = new double[dimensions];
  	int* new_o = new int[T];
	double min_dist, dist;
  	int i, d, k;
	bool found;
  	
  	for(i = 0; i < T; i++) {
  		// extract a symbol consisting of actual values
		for(d = 0; d < dimensions; d++) {
  			o[d] = sequence[i][d];
		}
  		// match the symbol with it's index
  		found = false;
		min_dist = euclidDistance(o, V, dimensions, 0);
		new_o[i] = 0;
		for(k = 1; k < sigmaSize && !found; k++) {
			dist = euclidDistance(o, V, dimensions, k);
			if(dist < min_dist) {
				min_dist = dist;
				new_o[i] = k;
			}
			if(min_dist == 0)
				found = true;
		}
  	}
	delete [] o;	// clean up
  	return new_o; 
}

/*
 * Calculates the Euclidean distance between two symbols.
 */
double euclidDistance(double* O, double** V, int dim, int k)
{
	int i;
	double sum=0;

	for(i = 0; i < dim; i++)
		sum = sum + (O[i] - V[k][i])*(O[i] - V[k][i]);
	return sqrt(sum);
}

/*
 * Loads an HMM from a file. A new HMM is created and the read values
 * are stored as the HMM parameters. Returned is the new HMM. Returns
 * NULL if file could not be opened.
 */
HMM* load(char* filename)
{
	HMM* hmmptr = NULL;
	int i, j, numStatesTemp = 0, sigmaSizeTemp = 0, dimensionsTemp = -1;

	ifstream in(filename);
	if(in.is_open()) {
		// read the number of states, vocab size, and dimensions
		in >> numStatesTemp;
		in >> sigmaSizeTemp;
		in >> dimensionsTemp;

		// create a new HMM
		if(dimensionsTemp == -1)
			hmmptr = new HMM(numStatesTemp, sigmaSizeTemp);
		else
			hmmptr = new HMM(numStatesTemp, sigmaSizeTemp, dimensionsTemp);

		// read the initial probabilities
		for(i = 0; i < hmmptr->numStates; i++)
			in >> hmmptr->pi[i];
		
		// read the transition probabilities
		for(i = 0; i < hmmptr->numStates; i++)
			for(j = 0; j < hmmptr->numStates; j++)
				in >> hmmptr->a[i][j];

		// read the bias probabilities
		for(i = 0; i < hmmptr->numStates; i++)
			for(j = 0; j < hmmptr->sigmaSize; j++)
				in >> hmmptr->b[i][j];

		// read the output vocabulary if it was used
		// (dimensions == -1 means that the vocab wasn't used)
		if(hmmptr->dimensions != -1)
			for(i = 0; i < hmmptr->sigmaSize; i++)
				for(j = 0; j < hmmptr->dimensions; j++)
					in >> hmmptr->V[i][j];

		in.close();	// close file
	}
	return hmmptr;
}
💿 文件大小 11 K
👤 上传用户 huanzhudev
📂 所属分类驱动编程
🏷️ 相关标签

#algorithm #Markov #hidden #about
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -