📄 hmm.cc
字号:
}/************************* FILE OPERATIONS ****************************/// set model parameters from a file.//void HMM::load_model(char* filename){ //read in param file FILE *param_file; double prob; param_file = fopen(filename,"r"); if (!param_file) { cerr << "ERROR: can not open " << filename << ".\n"; exit(-1); } if (fscanf(param_file,"states: %d\n",&max_states)!=1) { cerr << "ERROR: Problem reading states: field of " << filename << "\n"; exit(-1); } if (fscanf(param_file,"symbols: %d\n",&max_symbols)!=1 ) { cerr << "ERROR: Problem reading symbols: field of " << filename << "\n"; exit(-1); } // allocate space for model parameters alloc_model_matrices(); for (int i=0; i < max_states; i++) { fscanf(param_file,"%lf ",&prob); States[i]->set_recur_trans(prob); for (int j=0; j < max_symbols; j++) { fscanf(param_file,"%lf ",&prob); States[i]->set_recur_out(j,prob); } fscanf(param_file,"\n"); fscanf(param_file,"%lf ",&prob); States[i]->set_next_trans(prob); for (j=0; j < max_symbols; j++) { fscanf(param_file,"%lf ",&prob); States[i]->set_next_out(j,prob); } fscanf(param_file,"\n"); } fclose(param_file);}// figure out number of symbols and number of strings in file// and load strings matrix and string_len array.//int HMM::load_string_matrix(char* filename){ int str,sym,tmp,val; int symbol_count; // open file FILE *from; char line[MAX_LINE]; from = fopen(filename,"r"); symbol_count=0; for (num_strings=0; fscanf(from,"%[^\n]\n",line)!=EOF; num_strings++) { for (sym=0,tmp=99; tmp>1; sym++) { tmp=sscanf(line,"%d %[0-9 ]",&val,line); } if (symbol_count < sym) symbol_count=sym; } cout << "\nmax sequence length = " << symbol_count << "\n"; cout << "number of strings = " << num_strings << "\n"; rewind(from); // allocate matrix strings = new int* [num_strings]; string_len = new int [num_strings]; for (int i=0; i<num_strings; i++) { strings[i]=new int [symbol_count]; for (sym=0; sym<symbol_count; sym++) // initialize array to -1 strings[i][sym]=-1; } // load values into matrix diff_len=FALSE; int tmp_max_symbols=0; for (str=0; fscanf(from,"%[^\n]\n",line)!=EOF; str++) { for (sym=0,tmp=99; tmp>1; sym++) { tmp=sscanf(line,"%d %[0-9 ]",&val,line); strings[str][sym]=val; if (tmp_max_symbols<val+1) tmp_max_symbols=val+1; } string_len[str]=sym; if (sym!=symbol_count) diff_len=TRUE; } if (diff_len == TRUE) cout << "\nStrings of different lengths\n"; // change max_symbols based on length of file if (tmp_max_symbols > max_symbols) { cout << "\nsetting max_symbols from file to "<<tmp_max_symbols<<"\n"; max_symbols=tmp_max_symbols; } fclose(from); return symbol_count;}/************************* CALCULATIONS ****************************/// rescale alpha values (from Rabiner).// void HMM::rescale_alphas(int col){ scaling_factors[col] = 0.0; for (int i=0; i < max_states; i++) { scaling_factors[col] += alpha[col][i]; } // rescale all the alpha's and smooth them for (i=0; i < max_states; i++) { alpha[col][i] /= scaling_factors[col]; }}// rescale beta values after rescaling alphas.//void HMM::rescale_betas(int col){ // rescale all the beta's w alpha's factors for (int i=0; i < max_states; i++) { beta[col][i] /= scaling_factors[col]; }}// calculate alpha trellis and return final alpha value (recognition prob).// double HMM::alpha_F(int* symbol_array, int symbol_count){ double accum; int i,j; symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // clear the trellis and set the first column for (i=0; i < trellis_width; i++) for (j=0; j < max_states; j++) { alpha[i][j] = 0.0; } alpha[0][0] = 1.0; // first state starts with alpha = 1.0 rescale_alphas(0); // first col rescale is div by one // calculate the alphas for rest of cols for (i = 0; i < symbol_count; i++) { for (j=max_states-1; j > 0; j--) { accum = alpha[i][j] * States[j]->set_recur_trans() * States[j]->set_recur_out(symbol_array[i]); alpha[i+1][j] = alpha[i][j-1] * States[j-1]->set_next_trans() * States[j-1]->set_next_out(symbol_array[i]) + accum; } alpha[i+1][0] = alpha[i][0] * States[0]->set_recur_trans() * States[0]->set_recur_out(symbol_array[i]); rescale_alphas(i+1); } return (alpha[symbol_count][max_states-1]);}// calculate beta trellis and return initial beta value,// instead of going bottom to top, go top to bottom; the bottom// is the exception this time. we look at transistions from current// state (single) to the next states (plural).//double HMM::beta_I(int* symbol_array, int symbol_count) { double accum; symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // clear the trellis and set the last column for (int i=0; i < trellis_width; i++) for (int j=0; j < max_states; j++) { beta[i][j] = 0.0; } beta[symbol_count][max_states-1] = 1.0; /* final beta is 1.0 */ rescale_betas(symbol_count); // this is really div by 1 so no effect // begin setting all the cols except last one for (int t = symbol_count-1; t >= 0; t--) { for (int j=0; j < max_states-1; j++) { accum = beta[t+1][j] * States[j]->set_recur_trans() * States[j]->set_recur_out(symbol_array[t]); beta[t][j] = beta[t+1][j+1] * States[j]->set_next_trans() * States[j]->set_next_out(symbol_array[t]) + accum; } beta[t][max_states-1] = beta[t+1][max_states-1] * States[max_states-1]->set_recur_trans() * States[max_states-1]->set_recur_out(symbol_array[t]); rescale_betas(t); } return (beta[0][0]); }// compute gamma values and fill gamma tables for entire trellis.//void HMM::compute_gamma(int* symbol_array, int symbol_count){ symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // clear gammas for (int i=0; i < symbol_count; i++) for (int j=0; j < max_states; j++) { gamma_recur[i][j] = 0.0; gamma_next[i][j] = 0.0; } // calc the gamma table for (int t = 0; t < symbol_count; t++) { for (int i=0,j=1; i < max_states-1; i++,j++) { gamma_next[t][i] = (alpha[t][i]* States[i]->set_next_trans() * States[i]->set_next_out(symbol_array[t]) * beta[t+1][j]) / alpha[symbol_count][max_states-1]; gamma_recur[t][i] = (alpha[t][i]* States[i]->set_recur_trans() * States[i]->set_recur_out(symbol_array[t]) * beta[t+1][i]) / alpha[symbol_count][max_states-1]; } gamma_recur[t][max_states-1] = (alpha[t][max_states-1] * States[max_states-1]->set_recur_trans() * States[max_states-1]->set_recur_out(symbol_array[t]) * beta[t+1][max_states-1]) / alpha[symbol_count][max_states-1]; gamma_next[t][max_states-1] = 0.0; }}// this is the numerator of the a_ij reestimate and denom of b_ij reestimate,// this func assumes gammas have been calc'd.//double HMM::a_numer(int i, int j, int symbol_count){ double sum = 0.0; symbol_count=(symbol_count<0?trellis_width-1:symbol_count); for(int t=0; t < symbol_count; t++) { if (i==j) { sum += gamma_recur[t][i]; } else if ( i < max_states-1 ) { sum += gamma_next[t][i]; } else { cerr << "WARNING: gamma_next[t]["<<i<<"] shouldn't be requested\n"; break; } } return sum;}// this is the denominator of the a_ij reestimate.//double HMM::a_denom(int i, int j, int symbol_count){ symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // j not used in this func - pass for consistency double sum = 0.0; for(int t=0; t < symbol_count; t++) if (i<max_states-1) sum += gamma_recur[t][i] + gamma_next[t][i]; else sum += gamma_recur[t][i]; return sum;}// this is the numerator of the b_ij reestimate.// double HMM::b_numer(int i, int j, int sym, int *symbol_array, int symbol_count){ symbol_count=(symbol_count<0?trellis_width-1:symbol_count); double sum = 0.0; for(int t=0; t < symbol_count; t++) { if ( symbol_array[t] == sym ) { if (i==j) sum += gamma_recur[t][i]; else { if (i < max_states-1) sum += gamma_next[t][i]; else { cerr << "WARNING: gamma_next[t]["<<i<<"] shouldn't be requested\n"; break; } } } } return sum;}// set cumulative ab counts from an array of equal length strings.//double HMM::set_cumulative_ab_counts(){ int *symbol_array; double alpha_tot; // dynamic allocation of arrays (ANSI) symbol_array = new int [trellis_width-1]; // clear cumulative sum arrays for(int i=0; i < max_states; i++) { a_numer_sum_recur[i] = 0.0; a_denom_sum_recur[i] = 0.0; a_numer_sum_next[i] = 0.0; a_denom_sum_next[i] = 0.0; for (int j=0; j < max_symbols; j++) { b_numer_sum_recur[i][j] = 0.0; b_numer_sum_next[i][j] = 0.0; } } alpha_tot=0.0; // loop all the strings calc a,b sums for(int s=0; s < num_strings; s++) { // set the symbol array to string from matrix for (int i=0; i < trellis_width-1; i++) { symbol_array[i] = strings[s][i]; } // fill the alpha matrix alpha_tot += alpha_F(symbol_array); // fill the beta matrix beta_I(symbol_array); // fill the gamma_next and gamma_recur matrices compute_gamma(symbol_array); int j; for (i=0,j=1; j < max_states; i++,j++) { a_numer_sum_recur[i] += a_numer(i,i); a_numer_sum_next[i] += a_numer(i,j); a_denom_sum_recur[i] += a_denom(i,i); a_denom_sum_next[i] += a_denom(i,j); for (int k=0; k < max_symbols; k++) { b_numer_sum_recur[i][k] += b_numer(i,i,k,symbol_array); b_numer_sum_next[i][k] += b_numer(i,j,k,symbol_array); } } // compute recurrent probs for last state i = max_states-1; a_numer_sum_recur[i] += a_numer(i,i); a_denom_sum_recur[i] += a_denom(i,i); for (int k=0; k < max_symbols; k++) b_numer_sum_recur[i][k] += b_numer(i,i,k,symbol_array); } delete [] symbol_array; return(alpha_tot);}// set ab count from one string.//double HMM::set_ab_counts(int* symbol_array, int symbol_count){ double alpha_tot; int i,j; // fill the alpha matrix alpha_tot = alpha_F(symbol_array,symbol_count); // fill the beta matrix beta_I(symbol_array,symbol_count); // fill the gamma_next and gamma_recur matrices compute_gamma(symbol_array,symbol_count); for (i=0,j=1; j < max_states; i++,j++) { a_numer_sum_recur[i] = a_numer(i,i,symbol_count); a_numer_sum_next[i] = a_numer(i,j,symbol_count); a_denom_sum_recur[i] = a_denom(i,i,symbol_count); a_denom_sum_next[i] = a_denom(i,j,symbol_count); for (int k=0; k < max_symbols; k++) { b_numer_sum_recur[i][k] = b_numer(i,i,k,symbol_array,symbol_count); b_numer_sum_next[i][k] = b_numer(i,j,k,symbol_array,symbol_count); } } // compute recurrent probs for last state i = max_states-1; a_numer_sum_recur[i] = a_numer(i,i,symbol_count); a_denom_sum_recur[i] = a_denom(i,i,symbol_count); for (int k=0; k < max_symbols; k++) b_numer_sum_recur[i][k] = b_numer(i,i,k,symbol_array,symbol_count); // delete [] symbol_array; return(alpha_tot);}// reestimate parameters after calculating ab_counts//double HMM::reestimate(){ double prob, diff_sum = 0.0; // loop all the transition probs for (int i=0, j=1; j < max_states; i++, j++) { // do all the prob transitions except last one prob = a_numer_sum_recur[i] / a_denom_sum_recur[i]; diff_sum += fabs(prob - States[i]->set_recur_trans()); States[i]->set_recur_trans(prob); for (int k=0; k < max_symbols; k++) { prob = b_numer_sum_recur[i][k] / a_numer_sum_recur[i]; diff_sum += fabs(prob - States[i]->set_recur_out(k)); States[i]->set_recur_out(k,prob); } // do all the next transitions prob = a_numer_sum_next[i] / a_denom_sum_next[i]; diff_sum += fabs(prob - States[i]->set_next_trans()); States[i]->set_next_trans(prob); for (k=0; k < max_symbols; k++) { prob = b_numer_sum_next[i][k] / a_numer_sum_next[i]; diff_sum += fabs(prob - States[i]->set_next_out(k)); States[i]->set_next_out(k,prob); } } // calc the recurrent prob for last state i = max_states-1; prob = a_numer_sum_recur[i] / a_denom_sum_recur[i]; diff_sum += fabs(prob - States[i]->set_recur_trans()); States[i]->set_recur_trans(prob); for (int k=0; k < max_symbols; k++) { prob = b_numer_sum_recur[i][k] / a_numer_sum_recur[i]; diff_sum += fabs(prob - States[i]->set_recur_out(k)); States[i]->set_recur_out(k,prob); } return diff_sum;}// test model for a single string.//double HMM::test(int* string, int symbol_count){ double log_alpha; symbol_count=(symbol_count<0?trellis_width-1:symbol_count); // fill alpha trellis and scaling coeffs log_alpha=log(alpha_F(string,symbol_count)); // calc log probability from coeffs if (scaling_factors[0]>0.0) { // if scaling_factors set log_alpha=0.0; for (int t=0; t < symbol_count+1; t++) log_alpha += log(scaling_factors[t]); } return log_alpha;}/************************* STATE DECLARATIONS ****************************/// create state object.//STATE::STATE(int num_symbols){ recur_out = new double [num_symbols]; next_out = new double [num_symbols];}// set/return the probability of generating a particluar symbol // during a recurrent transition. (smooth with MIN_PROB)//double STATE::set_recur_out(int symbol, double prob){ if (prob != -1.0) { recur_out[symbol] = prob; } if (recur_out[symbol] < MIN_PROB) recur_out[symbol] = MIN_PROB; return recur_out[symbol];}// set/return the probability of generating a particluar symbol // during a transition to the next state. (smooth with MIN_PROB)//double STATE::set_next_out(int symbol, double prob){ if (prob != -1.0) { next_out[symbol] = prob; } if (next_out[symbol] < MIN_PROB) next_out[symbol] = MIN_PROB; return next_out[symbol];}// set/return the probability of making a recurrent transition.// (smooth with MIN_PROB)//double STATE::set_recur_trans(double prob){ if (prob != -1.0) { recur_trans = prob; } if (recur_trans < MIN_PROB) recur_trans = MIN_PROB; return recur_trans;}// set/return the probability of making transitioning to the next state.// (smooth with MIN_PROB)//double STATE::set_next_trans(double prob){ if (prob != -1.0) { next_trans = prob; } if (next_trans < MIN_PROB) next_trans = MIN_PROB; return next_trans;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -