📄 hmm.cpp
字号:
#include "..\\include\\hmm.h"
#include <fstream.h>
#include <stdlib.h>
#include <iostream.h>
#include <iomanip.h>
#include <math.h>
/*
* Class constructor. Initializes an HMM given
* number of states and output vocabulary size.
*/
HMM::HMM(int numOfStates, int vocabSize)
{
int i;
// make sure that the number of states and size of
// vocab is at least one
if(numOfStates < 1)
numOfStates = 1;
if(vocabSize < 1)
vocabSize = 1;
numStates = numOfStates;
sigmaSize = vocabSize;
dimensions = -1;
pi = new double[numStates];
a = new double*[numStates];
for(i = 0; i < numStates; i++)
a[i] = new double[numStates];
b = new double*[numStates];
for(i = 0; i < numStates; i++)
b[i] = new double[sigmaSize];
}
/*
* Class constructor. Initializes an HMM a given
* number of states, output vocabulary size, and
* number of dimensions for its observation symbols.
*/
HMM::HMM(int numOfStates, int vocabSize, int d)
{
int i;
// make sure that the number of states, size of
// vocab, and number of dimensions are at least one
if(numOfStates < 1)
numOfStates = 1;
if(vocabSize < 1)
vocabSize = 1;
if(d < 1)
d = 1;
numStates = numOfStates;
sigmaSize = vocabSize;
dimensions = d;
pi = new double[numStates];
a = new double*[numStates];
for(i = 0; i < numStates; i++)
a[i] = new double[numStates];
b = new double*[numStates];
for(i = 0; i < numStates; i++)
b[i] = new double[sigmaSize];
V = new double*[sigmaSize];
for(i = 0; i < sigmaSize; i++)
V[i] = new double[d];
}
/*
* Class destructors. Frees up the space we allocated on the
* heap for storing our member variables.
*/
HMM::~HMM()
{
int i;
// clean up
delete [] pi;
for(i = 0; i < numStates; i++) {
delete [] a[i];
delete [] b[i];
}
delete [] a;
delete [] b;
if(dimensions != -1) {
for(i = 0; i < sigmaSize; i++)
delete [] V[i];
delete [] V;
}
}
/*
* Calculation of Forward variables f(i,t) for state i at time
* t for sequence o with the current HMM parameters.
*/
double** HMM::forwardProc(int* o, int T)
{
double** fwd;
int i, j, t;
fwd = new double*[numStates];
for(i = 0; i < numStates; i++)
fwd[i] = new double[T];
// initialization (time 0)
for(i = 0; i < numStates; i++)
fwd[i][0] = pi[i] * b[i][o[0]];
// induction
for(t = 0; t <= T-2; t++) {
for(j = 0; j < numStates; j++) {
fwd[j][t+1] = 0;
for(i = 0; i < numStates; i++)
fwd[j][t+1] += (fwd[i][t] * a[i][j]);
fwd[j][t+1] *= b[j][o[t+1]];
}
}
return fwd;
}
/*
* Calculation of Backward variables b(i,t) for state i at time
* t for sequence o with the current HMM parameters.
*/
double** HMM::backwardProc(int* o, int T)
{
double** bwd;
int i, j, t;
bwd = new double*[numStates];
for(i = 0; i < numStates; i++)
bwd[i] = new double[T];
// initialization (time 0)
for(i = 0; i < numStates; i++)
bwd[i][T-1] = 1;
// induction
for(t = T - 2; t >= 0; t--) {
for(i = 0; i < numStates; i++) {
bwd[i][t] = 0;
for (j = 0; j < numStates; j++)
bwd[i][t] += (bwd[j][t+1] * a[i][j] * b[j][o[t+1]]);
}
}
return bwd;
}
/*
* Viterbi algorithm for hidden Markov models. Given an
* observation sequence o, Viterbi finds the best possible
* state sequence for o on this HMM, along with the state
* optimized probability.
*/
double** HMM::viterbi(int* o, int T)
{
int i, j, t;
int min_state;
double min_weight, weight;
double stateOptProb = 0.0;
int* Q = new int[T];
int** sTable;// = new int[numStates][T];
double** aTable;// = new double[numStates][T];
double** answer;// = new double[2][T];
sTable = new int*[numStates];
for(i = 0; i < numStates; i++)
sTable[i] = new int[T];
aTable = new double*[numStates];
for(i = 0; i < numStates; i++)
aTable[i] = new double[T];
answer = new double*[2];
answer[0] = new double;
answer[1] = new double[T];
// calulate accumulations and best states for time 0
for(i = 0; i < numStates; i++) {
aTable[i][0] = -1*log(pi[i]) - log(b[i][o[0]]);
sTable[i][0] = 0;
}
// fill up the rest of the tables
for(t = 1; t < T; t++) {
for(j = 0; j < numStates; j++) {
min_weight = aTable[0][t-1] - log(a[0][j]);
min_state = 0;
for(i = 1; i < numStates; i++) {
weight = aTable[i][t-1] - log(a[i][j]);
if(weight < min_weight) {
min_weight = weight;
min_state = i;
}
}
aTable[j][t] = min_weight - log(b[j][o[t]]);
sTable[j][t] = min_state;
}
}
// find minimum value for time T-1
min_weight = aTable[0][T-1];
min_state = 1;
for(i = 1; i < numStates; i++) {
if(aTable[i][T-1] < min_weight) {
min_weight = aTable[i][T-1];
min_state = i;
}
}
// trace back to find the optimized state sequence
Q[T-1] = min_state;
for(t = T-2; t >= 0; t--)
Q[t] = sTable[Q[t+1]][t+1];
// store answers and return them
answer[0][0] = min_weight;
for(i = 0; i < T; i++)
answer[1][i] = Q[i];
return answer;
}
/*
* Prints all the parameters of this HMM. Parameters include initial,
* transition, and output probabilities.
*/
void HMM::print()
{
int i, j, k;
// print the intial probabilities
for(i = 0; i < numStates; i++)
cout << "pi(" << i << ") = " << setprecision(3) << pi[i] << " ";
cout << endl << endl;
// print the transition probabilities
for(i = 0; i < numStates; i++) {
for(j = 0; j < numStates; j++)
cout << "a(" << i << "," << j << ") = " << setprecision(3) << a[i][j] << " ";
cout << endl;
}
// print the emission probabilities
cout << endl;
for(i = 0; i < numStates; i++) {
for(k = 0; k < sigmaSize; k++)
cout << "b(" << i << "," << k << ") = " << setprecision(3) << b[i][k] << " ";
cout << endl;
}
}
/*
* Converts a sequence of actually values into their corresponding
* indices based on the vocabulary of this HMM. Any time a hidden
* Markov model algorithm whose input involves an observation sequence
* is used, the observation sequence should be converted to indices
* if it's not already.
*/
int* HMM::convert(double** sequence, int T)
{
double* o = new double[dimensions];
int* new_o = new int[T];
double min_dist, dist;
int i, d, k;
bool found;
for(i = 0; i < T; i++) {
// extract a symbol consisting of actual values
for(d = 0; d < dimensions; d++) {
o[d] = sequence[i][d];
}
// match the symbol with it's index
found = false;
min_dist = euclidDistance(o, V, dimensions, 0);
new_o[i] = 0;
for(k = 1; k < sigmaSize && !found; k++) {
dist = euclidDistance(o, V, dimensions, k);
if(dist < min_dist) {
min_dist = dist;
new_o[i] = k;
}
if(min_dist == 0)
found = true;
}
}
delete [] o; // clean up
return new_o;
}
/*
* Calculates the Euclidean distance between two symbols.
*/
double euclidDistance(double* O, double** V, int dim, int k)
{
int i;
double sum=0;
for(i = 0; i < dim; i++)
sum = sum + (O[i] - V[k][i])*(O[i] - V[k][i]);
return sqrt(sum);
}
/*
* Loads an HMM from a file. A new HMM is created and the read values
* are stored as the HMM parameters. Returned is the new HMM. Returns
* NULL if file could not be opened.
*/
HMM* load(char* filename)
{
HMM* hmmptr = NULL;
int i, j, numStatesTemp = 0, sigmaSizeTemp = 0, dimensionsTemp = -1;
ifstream in(filename);
if(in.is_open()) {
// read the number of states, vocab size, and dimensions
in >> numStatesTemp;
in >> sigmaSizeTemp;
in >> dimensionsTemp;
// create a new HMM
if(dimensionsTemp == -1)
hmmptr = new HMM(numStatesTemp, sigmaSizeTemp);
else
hmmptr = new HMM(numStatesTemp, sigmaSizeTemp, dimensionsTemp);
// read the initial probabilities
for(i = 0; i < hmmptr->numStates; i++)
in >> hmmptr->pi[i];
// read the transition probabilities
for(i = 0; i < hmmptr->numStates; i++)
for(j = 0; j < hmmptr->numStates; j++)
in >> hmmptr->a[i][j];
// read the bias probabilities
for(i = 0; i < hmmptr->numStates; i++)
for(j = 0; j < hmmptr->sigmaSize; j++)
in >> hmmptr->b[i][j];
// read the output vocabulary if it was used
// (dimensions == -1 means that the vocab wasn't used)
if(hmmptr->dimensions != -1)
for(i = 0; i < hmmptr->sigmaSize; i++)
for(j = 0; j < hmmptr->dimensions; j++)
in >> hmmptr->V[i][j];
in.close(); // close file
}
return hmmptr;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -