📄 hmm.cpp
字号:
}
for (s=0; s < max_states; s++) // normalize so add to one
prob_states[s] = prob_states[s] / prob_sum;
}
/************************* FILE OPERATIONS ****************************/// set model parameters from a file.//void HMM::load_model(char* filename){ //read in param file FILE *param_file; double prob; param_file = fopen(filename,"r"); if (!param_file) { cerr << "ERROR: can not open " << filename << ".\n"; exit(-1); } if (fscanf(param_file,"states: %d\n",&max_states)!=1) { cerr << "ERROR: Problem reading states: field of " << filename << "\n"; exit(-1); } if (fscanf(param_file,"symbols: %d\n",&max_symbols)!=1 ) { cerr << "ERROR: Problem reading symbols: field of " << filename << "\n"; exit(-1); } // allocate space for model parameters alloc_model_matrices();
for (int i=0; i < max_states; i++)
{ fscanf(param_file,"%lf ",&prob);
States[i]->start = prob;
fscanf(param_file,"\n");
int j;
for (j=0; j < max_states; j++)
{
fscanf(param_file,"%lf ",&prob);
States[i]->next_trans[j] = prob;
}
fscanf(param_file,"\n");
for (j=0; j < max_symbols; j++)
{ fscanf(param_file,"%lf ",&prob); States[i]->recur_out[j] = prob; } fscanf(param_file,"\n"); } fclose(param_file);}// figure out number of symbols and number of strings in file// and load strings matrix and string_len array.//读取 观察值 序列 一般来说是多个int HMM::load_string_matrix(char* filename){ int str,sym,tmp,val; int symbol_count; // open file FILE *from; char line[MAX_LINE]; from = fopen(filename,"r"); symbol_count=0;
for (num_strings=0; fscanf(from,"%[^\n]\n",line)!=EOF; num_strings++) {
for (sym=0,tmp=99; tmp>1; sym++) {
tmp=sscanf(line,"%d %[0-9 ]",&val,line);
}
if (symbol_count < sym) symbol_count=sym; } cout << "\nmax sequence length = " << symbol_count << "\n"; cout << "number of strings = " << num_strings << "\n"; rewind(from); // allocate matrix strings = new int* [num_strings]; string_len = new int [num_strings]; for (int i=0; i<num_strings; i++) { strings[i]=new int [symbol_count]; for (sym=0; sym<symbol_count; sym++) // initialize array to -1 strings[i][sym]=-1; } // load values into matrix diff_len=FALSE; int tmp_max_symbols=0; for (str=0; fscanf(from,"%[^\n]\n",line)!=EOF; str++) { for (sym=0,tmp=99; tmp>1; sym++) { tmp=sscanf(line,"%d %[0-9 ]",&val,line); strings[str][sym]=val; if (tmp_max_symbols<val+1) tmp_max_symbols=val+1; } string_len[str]=sym; if (sym!=symbol_count) diff_len=TRUE; } if (diff_len == TRUE) cout << "\nStrings of different lengths\n"; // change max_symbols based on length of file if (tmp_max_symbols > max_symbols) { cout << "\nsetting max_symbols from file to "<<tmp_max_symbols<<"\n"; max_symbols=tmp_max_symbols; } fclose(from); return symbol_count;}/************************* CALCULATIONS ****************************/// rescale alpha values (from Rabiner).// void HMM::rescale_alphas(int col){ scaling_factors[col] = 0.0; int i; for (i=0; i < max_states; i++) { scaling_factors[col] += alpha[col][i]; } // rescale all the alpha's and smooth them for (i=0; i < max_states; i++) { alpha[col][i] /= scaling_factors[col]; }}// rescale beta values after rescaling alphas.//void HMM::rescale_betas(int col){ // rescale all the beta's w alpha's factors for (int i=0; i < max_states; i++) { beta[col][i] /= scaling_factors[col]; }}// calculate alpha trellis and return final alpha value (recognition prob).// 前向算法double HMM::alpha_F(int* symbol_array, int symbol_count){ double accum =0; int i,j,k; symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // clear the trellis and set the first column 这很重要 for (i=0; i < trellis_width; i++) for (j=0; j < max_states; j++)
{ alpha[i][j] = 0.0; }
//初始化 准备递归
for(i =0; i<max_states; i++)
alpha[0][i] = States[i]->start*States[i]->recur_out[symbol_array[0]];
// 开始前向算法的 递归过程了 激动人心的时刻啊 for (k = 1; k < symbol_count; k++)
{ for (j=0; j < max_states; j++)
for(i=0; i<max_states; i++)
{
alpha[k][j] += alpha[k-1][i]*States[i]->next_trans[j]*States[j]->recur_out[symbol_array[k]];
}
}
for(i =0 ;i<max_states ; i++)
accum +=alpha[symbol_count -1][i]; return accum;}// calculate beta trellis and return initial beta value,// instead of going bottom to top, go top to bottom; the bottom// is the exception this time. we look at transistions from current// state (single) to the next states (plural).//后向算法double HMM::beta_I(int* symbol_array, int symbol_count){ double accum =0;
int i,j,k; symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // clear the trellis and set the last column for (i=0; i < trellis_width; i++) for (j=0; j < max_states; j++)
{ beta[i][j] = 0.0; }
for(i =0; i<max_states; i++)
beta[symbol_count-1][i] = 1;
// begin setting all the cols except last one for (k = symbol_count-2; k >= 0; k--)
{
for (i=0; i < max_states; i++)
for(j=0; j<max_states; j++)
{
beta[k][i] +=States[i]->next_trans[j]*States[j]->recur_out[symbol_array[k+1]]*beta[k+1][j];
}
}
for(i =0 ;i<max_states ; i++)
accum +=beta[0][i]; return accum; }// compute gamma values and fill gamma tables for entire trellis.//void HMM::compute_gamma(int* symbol_array, int symbol_count){ symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // clear gammas
int i,j,t; for (i=0; i < symbol_count; i++) for (j=0; j < max_states; j++)
{ gamma[i][j] = 0.0; } // calc the gamma table
double temp; for (t = 0; t < symbol_count; t++)
{
temp =0;
for (j=0; j< max_states; j++)
temp +=alpha[t][j]*beta[t][j];
for (i=0;i < max_states; i++)
gamma[t][i] = alpha[t][i]*beta[t][i]/temp;
}
}
void HMM::compute_epsum(int* symbol_array, int symbol_count)
{
symbol_count=(symbol_count<0?trellis_width-1:symbol_count);
// clear epsum
int i,j,t;
for(t=0; t<trellis_width; t++)
for (i=0; i < max_states; i++)
for (j=0; j < max_states; j++)
{
epsum[t][i][j] = 0.0;
}
// 计算序列的概率
double temp =0;
for(i =0; i<max_states; i++)
temp+=alpha[symbol_count][i];
//计算
for (t = 0; t < symbol_count-1; t++)
for(i=0; i<max_states; i++)
for(j=0; j<max_states; j++)
epsum[t][i][j]=alpha[t][i]* beta[t+1][j]*States[i]->next_trans[j]*States[j]->recur_out[symbol_array[t+1]];
}// set ab count from one string.//double HMM::set_ab_counts(int* symbol_array, int symbol_count){ double alpha_tot; // fill the alpha matrix alpha_tot = alpha_F(symbol_array,symbol_count); // fill the beta matrix beta_I(symbol_array,symbol_count); // fill the gamma_next and gamma_recur matrices compute_gamma(symbol_array,symbol_count);
compute_epsum(symbol_array,symbol_count); return(alpha_tot);}// reestimate parameters after calculating ab_counts//返回值是整个参数空间的不一致的总合
//在估计参数时 人为的进行了不为零的设置,就是说起始概率,转移概率和发射概率都不为零double HMM::reestimate(){ double prob,temp, diff_sum = 0.0;
int i,j;
//估计pi也就是起始概率
temp=0;
for(i=0; i<max_states; i++)
{
if(sum_gamma[0][i] <MIN_PROB)
sum_gamma[0][i] =MIN_PROB;
temp +=sum_gamma[0][i];
}
for(i=0; i<max_states; i++)
{
prob = States[i]->start;
States[i]->start =sum_gamma[0][i]/temp;
diff_sum += fabs(prob -States[i]->start);
}
//估计aij
for(i =0; i<max_states; i++)
{
temp =0;
for(j=0; j<max_states; j++)
{
temp+=sum_aij[i][j];
}
for(j=0; j<max_states; j++)
{
prob =States[i]->next_trans[j];
States[i]->set_next_trans(j, sum_aij[i][j]/temp);
diff_sum += fabs(prob -States[i]->next_trans[j]);
}
}
//估计发射概率
for(i =0; i<max_states; i++)
{
temp =0;
for(j=0; j<max_symbols; j++)
{
temp+=sum_b[i][j];
}
for(j=0; j<max_symbols; j++)
{
prob =States[i]->recur_out[j];
States[i]->set_recur_out(j, sum_b[i][j]/temp);
diff_sum += fabs(prob -States[i]->recur_out[j]);
}
}
return diff_sum;}// test model for a single string.//double HMM::test(int* string, int symbol_count){ double log_alpha; symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // fill alpha trellis and scaling coeffs log_alpha=log(alpha_F(string,symbol_count)); // calc log probability from coeffs if (scaling_factors[0]>0.0) { // if scaling_factors set log_alpha=0.0; for (int t=0; t < symbol_count+1; t++) log_alpha += log(scaling_factors[t]); } return log_alpha;}/************************* STATE DECLARATIONS ****************************/// create state object.//STATE::STATE(int num_state,int num_symbols){
recur_out = new double [num_symbols];
next_trans = new double [num_state];
start =(double)1/num_state; //初始化认为所有状态起始的概率相同
max_states = num_state; //add by fanshixi
max_symbols = num_symbols;
}// set/return the probability of generating a particluar symbol // during a recurrent transition. (smooth with MIN_PROB)//double STATE::set_recur_out(int symbol, double prob){ if (prob != -1.0) { recur_out[symbol] = prob; } if (recur_out[symbol] < MIN_PROB) recur_out[symbol] = MIN_PROB; return recur_out[symbol];}// set/return the probability of making transitioning to the next state.// (smooth with MIN_PROB)//double STATE::set_next_trans(int state,double prob){ if (prob != -1.0)
{ next_trans[state] = prob; } if (next_trans[state] < MIN_PROB) next_trans[state] = MIN_PROB;
return next_trans[state];}//out是返回的状态序列 调用函数的时候需要new好
double HMM::viterbi(int *symbol_array, int symbol_count, int **out)
{
double accum =0;
int i,j,k;
symbol_count=(symbol_count<0?trellis_width-1:symbol_count);
double** delta; //[t][i]时间为t时,当前状态为i的最大可能概率
int** preseq; //[t][i] 时间为t时,当前状态i取到最大概率时,前一个状态的值
double seqprob_max;
int state_max;
delta= new double *[symbol_count];
preseq =new int *[symbol_count];
for (k=0; k < symbol_count; k++)
{
delta[k] = new double [max_states];
preseq[k] = new int [max_states];
}
for (i =0; i<max_states; i++)
{
delta[0][i] = States[i]->start *States[i]->recur_out[symbol_array[0]];
}
double temp;
for (k=1; k < symbol_count; k++)
{
for(j=0; j<max_states; j++)
{
seqprob_max = -1;
state_max =-1;
for(i=0; i<max_states; i++)
{
temp=delta[k-1][i]*States[i]->next_trans[j]*States[j]->recur_out[symbol_array[k]];
if(temp > seqprob_max)
{
seqprob_max = temp;
state_max = i;
}
}
delta[k][j] = seqprob_max;
preseq[k][j] = state_max;
}
}
//开始获取 最佳状态序列了
seqprob_max = -1;
state_max =-1;
for (i=0; i < max_states; i++) //先得到最后一个最大的状态
{
if(delta[symbol_count-1][i] > seqprob_max)
{
seqprob_max = delta[symbol_count-1][i];
state_max = i;
}
}
int *tem;
tem=*out;
tem[symbol_count-1] = state_max;
for (k=symbol_count-1; k > 0; k--)
tem[k-1] = preseq[k][tem[k]];
return seqprob_max; //最大状态序列的最大概率
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -