📄 training_algorithms_multialpha.c
字号:
/* simulated annealing varialbles */ double temperature; double cooling_factor; int annealing_status; /* help variables for add_to_E */ int alphabet_nr; int alphabet; int a_size; double *E_cur; double *subst_mtx; struct msa_letter_s *msa_seq; /* remember start of sequence pointer */ msa_seq_infop_start = msa_seq_infop; old_log_likelihood_lab = 9999.0; new_log_likelihood_lab = 9999.0; old_log_likelihood_ulab = 9999.0; new_log_likelihood_ulab = 9999.0; max_nr_iterations = 70; iteration = 1; if(annealing == YES) { temperature = INIT_TEMP; cooling_factor = INIT_COOL; annealing_status = ACTIVE; } else { annealing_status = DONE; } do {#ifdef DEBUG_BW2 printf("starting baum-welch loop\n");#endif /* allocate per iteration matrices */ T = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); E = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); T_lab = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); E_lab = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); T_ulab = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); E_ulab = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); if(hmmp->nr_alphabets > 1) { E_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * sizeof(double))); E_lab_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * sizeof(double))); E_ulab_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * sizeof(double))); } if(hmmp->nr_alphabets > 2) { E_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * sizeof(double))); E_lab_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * sizeof(double))); E_ulab_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * sizeof(double))); } if(hmmp->nr_alphabets > 3) { E_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * sizeof(double))); E_lab_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * sizeof(double))); E_ulab_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * sizeof(double))); } old_log_likelihood_ulab = new_log_likelihood_ulab; new_log_likelihood_ulab = 0.0; old_log_likelihood_lab = new_log_likelihood_lab; new_log_likelihood_lab = 0.0; /* reset sequence pointer */ msa_seq_infop = msa_seq_infop_start; for(s = 0; s < nr_seqs; s++) { if(use_lead_columns == YES) { seq_len = msa_seq_infop->nr_lead_columns; } else { seq_len = msa_seq_infop->msa_seq_length; } /* calculate for unlabeled sequences */ /* calculate forward and backward matrices * memory for forw_mtx, scale_mtx and * backw_mtx is allocated in the functions */#ifdef DEBUG_BW2 printf("running forward unlabeled\n");#endif msa_forward_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, NO, &forw_mtx, &forw_scale, NO, normalize, scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);#ifdef DEBUG_BW2 printf("running backward unlabeled\n");#endif msa_backward_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, &backw_mtx, forw_scale, NO, normalize, scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);#ifdef DEBUG_BW2 printf("done with backward unlabeled\n");#endif /* update new_log_likelihood */ likelihood = log10((forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob); for(k = 0; k <= seq_len; k++) { likelihood = likelihood + log10(*(forw_scale + k)); }#ifdef DEBUG_BW dump_scaling_array(k-1,forw_scale); printf("likelihood = %f\n", likelihood);#endif new_log_likelihood_ulab += likelihood; for(k = 0; k < hmmp->nr_v-1; k++) /* k = from vertex */ { lp = *(hmmp->to_trans_array + k); while(lp->vertex != END) /* l = to-vertex */ { i = 0; while(1) { if(use_lead_columns == NO) { p = i; } else { p = *(msa_seq_infop->lead_columns_start + i); } add_Tkl_contribution_msa_multi(hmmp, msa_seq_infop, forw_mtx, backw_mtx, forw_scale, p, k, lp, T_ulab, use_gap_shares, use_lead_columns, i, NO, scoring_method, normalize, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4); i++; if(use_lead_columns == NO) { if(i >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + i) == END) { break; } } } /* move on to next path */ while(lp->next != NULL) { lp++; } lp++; } /* calculate E[k][a] contribution from this sequence */ if(silent_state_multi(k, hmmp) != 0) { i = 0; while(1) { /* get correct incex for this letter column */ if(use_lead_columns == NO) { p = i; } else { p = *(msa_seq_infop->lead_columns_start + i); } /* get basic scoring result */ Eka_base = add_Eka_contribution_msa_multi(hmmp, msa_seq_infop, forw_mtx, backw_mtx, p, k, i, use_lead_columns); /* loop over the alphabets */ for(alphabet_nr = 1; alphabet_nr <= hmmp->nr_alphabets; alphabet_nr++) { if(alphabet_nr == 1) { alphabet = hmmp->alphabet; subst_mtx = hmmp->subst_mtx; a_size = hmmp->a_size; E_cur = E_ulab; msa_seq = msa_seq_infop->msa_seq_1; } else if(alphabet_nr == 2) { alphabet = hmmp->alphabet_2; subst_mtx = hmmp->subst_mtx_2; a_size = hmmp->a_size_2; E_cur = E_ulab_2; msa_seq = msa_seq_infop->msa_seq_2; } else if(alphabet_nr == 3) { alphabet = hmmp->alphabet_3; subst_mtx = hmmp->subst_mtx_3; a_size = hmmp->a_size_3; E_cur = E_ulab_3; msa_seq = msa_seq_infop->msa_seq_3; } else if(alphabet_nr == 4) { alphabet = hmmp->alphabet_4; subst_mtx = hmmp->subst_mtx_4; a_size = hmmp->a_size_4; E_cur = E_ulab_4; msa_seq = msa_seq_infop->msa_seq_4; } /* get result and add to matrix according to scoring method */ add_to_E_multi(E_cur, Eka_base, msa_seq, p, k, a_size, normalize, subst_mtx, alphabet, scoring_method, use_nr_occ, DISCRETE, NULL); } /* update loop index, check if we are done */ i++; if(use_lead_columns == NO) { if(i >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + i) == END) { break; } } } } } #ifdef DEBUG_BW dump_T_matrix(hmmp->nr_v, hmmp->nr_v, T_ulab); dump_E_matrix(hmmp->nr_v, hmmp->a_size, E_ulab);#endif /* some garbage collection */ free(forw_mtx); free(backw_mtx); free(forw_scale); /* calculate for labeled seqs */ /* calculate forward and backward matrices * memory for forw_mtx, scale_mtx and * backw_mtx is allocated in the functions */#ifdef DEBUG_BW2 printf("running forward labeled\n");#endif msa_forward_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, NO, &forw_mtx, &forw_scale, YES, normalize, scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);#ifdef DEBUG_BW2 printf("running backward labeled\n");#endif msa_backward_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, &backw_mtx, forw_scale, YES, normalize, scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);#ifdef DEBUG_BW2 printf("done with backward labeled\n");#endif /* update new_log_likelihood */ likelihood = log10((forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob); for(k = 0; k <= seq_len; k++) { likelihood = likelihood + log10(*(forw_scale + k)); }#ifdef DEBUG_BW dump_scaling_array(k-1,forw_scale); printf("likelihood = %f\n", likelihood);#endif new_log_likelihood_lab += likelihood; for(k = 0; k < hmmp->nr_v-1; k++) /* k = from vertex */ { lp = *(hmmp->to_trans_array + k); while(lp->vertex != END) /* l = to-vertex */ { i = 0; while(1) { if(use_lead_columns == NO) { p = i; } else { p = *(msa_seq_infop->lead_columns_start + i); } /* add T[k][l] contribution for the msa-sequence */ add_Tkl_contribution_msa_multi(hmmp, msa_seq_infop, forw_mtx, backw_mtx, forw_scale, p, k, lp, T_lab, use_gap_shares, use_lead_columns, i, YES, scoring_method, normalize, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4); i++; if(use_lead_columns == NO) { if(i >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + i) == END) { break; } } } /* move on to next path */ while(lp->next != NULL) { lp++; } lp++; } /* calculate E[k][a] contribution from this sequence */ if(silent_state_multi(k, hmmp) != 0) { i = 0; while(1) { /* get correct incex for this letter column */ if(use_lead_columns == NO) { p = i; } else { p = *(msa_seq_infop->lead_columns_start + i); } /* get basic scoring result */ Eka_base = add_Eka_contribution_msa_multi(hmmp, msa_seq_infop, forw_mtx, backw_mtx, p, k, i, use_lead_columns); /* loop over the alphabets */ for(alphabet_nr = 1; alphabet_nr <= hmmp->nr_alphabets; alphabet_nr++) { if(alphabet_nr == 1) { alphabet = hmmp->alphabet; subst_mtx = hmmp->subst_mtx; a_size = hmmp->a_size; E_cur = E_lab; msa_seq = msa_seq_infop->msa_seq_1; } else if(alphabet_nr == 2) { alphabet = hmmp->alphabet_2; subst_mtx = hmmp->subst_mtx_2; a_size = hmmp->a_size_2; E_cur = E_lab_2; msa_seq = msa_seq_infop->msa_seq_2; } else if(alphabet_nr == 3) { alphabet = hmmp->alphabet_3; subst_mtx = hmmp->subst_mtx_3; a_size = hmmp->a_size_3; E_cur = E_lab_3; msa_seq = msa_seq_infop->msa_seq_3; } else if(alphabet_nr == 4) { alphabet = hmmp->alphabet_4; subst_mtx = hmmp->subst_mtx_4; a_size = hmmp->a_size_4; E_cur = E_lab_4; msa_seq = msa_seq_infop->msa_seq_4; } /* get result and add to matrix according to scoring method */ add_to_E_multi(E_cur, Eka_base, msa_seq, p, k, a_size, normalize, subst_mtx, alphabet, scoring_method, use_nr_occ, DISCRETE, NULL); } /* update loop index, check if we are done */ i++; if(use_lead_columns == NO) { if(i >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + i) == END) { break; } } } } } #ifdef DEBUG_BW dump_T_matrix(hmmp->nr_v, hmmp->nr_v, T_lab); dump_E_matrix(hmmp->nr_v, hmmp->a_size, E_lab);#endif /* some garbage collection */ free(forw_mtx); free(backw_mtx); free(forw_scale); msa_seq_infop++; } if(verbose == YES) { printf("log likelihood diff rd %d: %f\n", iteration, new_log_likelihood_ulab
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -