📄 core_algorithms_multialpha.c
字号:
t_res2 = t_res2 + *((hmmp->log_transitions) + get_mtx_index(wp->vertex, hmmp->nr_v - 1, hmmp->nr_v)); #ifdef DEBUG_VI printf("prev = %f: ", t_res1); printf("trans = %f\n", t_res2);#endif if(t_res1 != DEFAULT && t_res2 != DEFAULT) { res = t_res1 + t_res2; } else { res = DEFAULT; } if(prev == NO_PREV && res != DEFAULT) { prev = from_vp->vertex; prevp = from_vp; max = res; continue; } if(res > max && res != DEFAULT) { prev = from_vp->vertex; prevp = from_vp; max = res; } wp++; }#ifdef DEBUG_VI printf("res = %f\n", max);#endif if(max != DEFAULT) { (cur_rowp + hmmp->nr_v-1)->prob = max; (cur_rowp + hmmp->nr_v-1)->prev = prev; (cur_rowp + hmmp->nr_v-1)->prevp = prevp; } else {#ifdef DEBUG_VI dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->transitions); dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->log_transitions);#endif printf("Sequence cannot be produced by this hmm\n"); sequence_as_string(s); return NOPROB; }#ifdef PATH //dump_viterbi_matrix(seq_len + 2, hmmp->nr_v, viterbi_mtxp); //dump_viterbi_path((cur_rowp + w)->prevp); printf("normalized log likelihood for most probable path = %f\n", 0.0 - (((cur_rowp + hmmp->nr_v - 1)->prob) / seq_len)); printf("and most probable path is: "); //dump_viterbi_path((cur_rowp + hmmp->nr_v - 1), hmmp, viterbi_mtxp, seq_len + 1, hmmp->nr_v); printf("%d\n",hmmp->nr_v - 1); printf("log prob = %f\n", (cur_rowp + hmmp->nr_v-1)->prob); printf("real prob = %f\n", pow(10,(cur_rowp + hmmp->nr_v-1)->prob)); dump_viterbi_label_path((cur_rowp + hmmp->nr_v - 1), hmmp, viterbi_mtxp, seq_len + 1, hmmp->nr_v); printf("\n");#endif /* Garbage collection and return */ free(seq); if(hmmp->nr_alphabets > 1) { free(seq_2); } if(hmmp->nr_alphabets > 2) { free(seq_3); } if(hmmp->nr_alphabets > 3) { free(seq_4); } return OK;}/************************* the one_best algorithm **********************************/int one_best_multi(struct hmm_multi_s *hmmp, struct letter_s *s, struct letter_s *s_2, struct letter_s *s_3, struct letter_s *s_4, struct one_best_s **ret_one_best_mtxpp, double **ret_scale_fspp, int use_labels, char *best_labeling, int multi_scoring_method){ struct one_best_s *one_best_mtxp, *cur_rowp, *prev_rowp; /* pointers to forward matrix */ double *scale_fsp; /* pointer to array of scaling factors */ struct letter_s *seq, *seq_2, *seq_3, *seq_4; /* pointer to the sequence */ int *sorted_v_list; /* final list for the association of same-labeling-elements */ int v_list_index; int i,j; /* loop variables */ int seq_len; /* length of the sequence */ int c, v, w; /* looping indices, c loops over the sequence, v and w over the vertices in the HMM */ double row_sum, res, t_res1, t_res2, t_res3; /* temporary variables to calculate probabilities */ double t_res3_1, t_res3_2, t_res3_3, t_res3_4; int nr_v; struct path_element *wp; /* for traversing the paths from v to w */ int a_index, a_index_2, a_index_3, a_index_4; /* holds the current letter's position in the alphabet */ int replacement_letter_c, replacement_letter_c_2, replacement_letter_c_3, replacement_letter_c_4; double scaled_result; /* Allocate memory for matrix and scaling factors + some initial setup: * Note 1: one_best probability matrix has the sequence indices vertically * and the vertex indices horizontally meaning it will be filled row by row * Note 2: *ret_one_best_mtxpp and *ret_scale_fspp are allocated here, but must be * freed by caller */ nr_v = hmmp->nr_v; seq_len = get_seq_length(s); *ret_one_best_mtxpp = (struct one_best_s*)(malloc_or_die((seq_len+2) * hmmp->nr_v * sizeof(struct one_best_s))); one_best_mtxp = *ret_one_best_mtxpp; *ret_scale_fspp = (double*)(malloc_or_die((seq_len+2) * sizeof(double))); scale_fsp = *ret_scale_fspp; sorted_v_list = (int*)(malloc_or_die((hmmp->nr_v * 2 + 1) * sizeof(int))); /* Convert sequence to 1...L for easier indexing */ seq = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s))); memcpy(seq+1, s, seq_len * sizeof(struct letter_s)); if(hmmp->nr_alphabets > 1) { seq_2 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s))); memcpy(seq_2+1, s_2, seq_len * sizeof(struct letter_s)); } if(hmmp->nr_alphabets > 2) { seq_3 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s))); memcpy(seq_3+1, s_3, seq_len * sizeof(struct letter_s)); } if(hmmp->nr_alphabets > 3) { seq_4 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s))); memcpy(seq_4+1, s_4, seq_len * sizeof(struct letter_s)); } /* Initialize the first row of the matrix */ one_best_mtxp->prob = 1.0; /* sets index (0,0) to 1.0, the rest are already 0.0 as they should be */ one_best_mtxp->labeling = (char*)(malloc_or_die(1 * sizeof(char))); for(i = 1; i < hmmp->nr_v; i++) { (one_best_mtxp+i)->labeling = NULL; } *scale_fsp = 1.0; /* create initial sorted v-list*/ *(sorted_v_list) = 0; /* 0 is always the number of the start state */ *(sorted_v_list + 1) = V_LIST_NEXT; *(sorted_v_list + 2) = V_LIST_END; /* Fill in middle rows */ prev_rowp = one_best_mtxp; cur_rowp = one_best_mtxp + hmmp->nr_v; for(c = 1; c <= seq_len; c++) { /* get alphabet index for c*/ if(hmmp->alphabet_type == DISCRETE) { replacement_letter_c = NO; a_index = get_alphabet_index(&seq[c], hmmp->alphabet, hmmp->a_size); if(a_index < 0) { a_index = get_replacement_letter_index_multi(&seq[c], hmmp->replacement_letters, 1); if(a_index < 0) { printf("Letter '%s' is not in alphabet\n", (&seq[c])->letter); return NOPROB; } replacement_letter_c = YES; } } if(hmmp->nr_alphabets > 1) { if(hmmp->alphabet_type_2 == DISCRETE) { replacement_letter_c_2 = NO; a_index_2 = get_alphabet_index(&seq_2[c], hmmp->alphabet_2, hmmp->a_size_2); if(a_index_2 < 0) { a_index_2 = get_replacement_letter_index_multi(&seq_2[c], hmmp->replacement_letters, 2); if(a_index_2 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_2[c])->letter); return NOPROB; } replacement_letter_c_2 = YES; } } } if(hmmp->nr_alphabets > 2) { if(hmmp->alphabet_type_3 == DISCRETE) { replacement_letter_c_3 = NO; a_index_3 = get_alphabet_index(&seq_3[c], hmmp->alphabet_3, hmmp->a_size_3); if(a_index_3 < 0) { a_index_3 = get_replacement_letter_index_multi(&seq_3[c], hmmp->replacement_letters, 3); if(a_index_3 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_3[c])->letter); return NOPROB; } replacement_letter_c_3 = YES; } } } if(hmmp->nr_alphabets > 3) { if(hmmp->alphabet_type_4 == DISCRETE) { replacement_letter_c_4 = NO; a_index_4 = get_alphabet_index(&seq_4[c], hmmp->alphabet_4, hmmp->a_size_4); if(a_index_4 < 0) { a_index_4 = get_replacement_letter_index_multi(&seq_4[c], hmmp->replacement_letters, 4); if(a_index_4 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_4[c])->letter); return NOPROB; } replacement_letter_c_4 = YES; } } } #ifdef DEBUG_FW printf("seq[c] = %s\n", &seq[c]); printf("a_index = %d\n", a_index); printf("v_list dump out "); dump_v_list(sorted_v_list); #endif /* calculate probabilities */ row_sum = 0.0; for(v = 0; v < hmmp->nr_v - 1; v++) /* v = to-vertex */ {#ifdef DEBUG_FW printf("prob to vertex %d\n", v);#endif v_list_index = 0; (cur_rowp + v)->labeling = NULL; while(*(sorted_v_list + v_list_index) != V_LIST_END) { res = 0.0; while(*(sorted_v_list + v_list_index) != V_LIST_NEXT) { w = *(sorted_v_list + v_list_index); /* w = from-vertex */ /* calculate intermediate results */ res += (prev_rowp + w)->prob * *(hmmp->tot_transitions + (w * nr_v + v)); if(*(hmmp->tot_transitions + (w * nr_v + v)) < 0) { printf("Error: found model transition prob from %d to %d < 0.0\n", w, v); exit(0); }#ifdef DEBUG_FW printf("prev = %f: ", (prev_rowp + w)->prob); printf("trans = %f\n", *(hmmp->tot_transitions + (w * nr_v + v)));#endif v_list_index++; } if(res == 0.0) { v_list_index++; continue; } /* calculate prob of producing letter l in v */ t_res3 = get_single_emission_score_multi(hmmp, seq, seq_2, seq_3, seq_4, c, v, replacement_letter_c, replacement_letter_c_2, replacement_letter_c_3, replacement_letter_c_4, use_labels, a_index, a_index_2, a_index_3, a_index_4, multi_scoring_method); res = res * t_res3; /* check if this score is best so far */#ifdef DEBUG_FW printf("best score = %f\n",(cur_rowp + v)->prob);#endif if(res > (cur_rowp + v)->prob) {#ifdef DEBUG_FW printf("updating best score to %f\n", res); #endif /* save result in matrices */ (cur_rowp + v)->prob = res; /* set pointer to point to current labeling */ (cur_rowp + v)->labeling = (prev_rowp + w)->labeling; if((cur_rowp + v)->labeling == NULL) { printf("Error: NULL labeling when updating best score\n"); exit(0); } } #ifdef DEBUG_FW printf("res = %f\n", res);#endif v_list_index++; } } /* update labeling pointers */ update_labelings(cur_rowp, hmmp->vertex_labels, sorted_v_list, seq_len, c, hmmp->labels, hmmp->nr_labels, hmmp->nr_v); deallocate_row_labelings(prev_rowp, hmmp->nr_v); /* scale the results, row_sum = the total probability of * having produced the labelings up to and including character c */ row_sum = 0.0; for(v = 0; v < hmmp->nr_v; v++) { row_sum = row_sum + (cur_rowp + v)->prob;#ifdef DEBUG_FW dump_labeling((cur_rowp + v)->labeling, c); printf("c = %d\n", c);#endif } scale_fsp[c] = row_sum;#ifdef DEBUG_FW printf("rowsum = %f\n",row_sum); printf("scaling set to: %f\n", scale_fsp[c]);#endif if(row_sum == 0.0) { printf("Sequence cannot be produced by this hmm\n"); sequence_as_string(s); exit(0); return NOPROB; } for(v = 0; v < hmmp->nr_v; v++) { if((cur_rowp + v)->prob != 0.0){ (cur_rowp + v)->prob = ((cur_rowp + v)->prob)/row_sum; /* scaling */ } } /* move row pointers one row forward */ prev_rowp = cur_rowp; cur_rowp = cur_rowp + hmmp->nr_v; } /* Fill in transition to end state */ v_list_index = 0; (cur_rowp + hmmp->nr_v - 1)->labeling = NULL; while(*(sorted_v_list + v_list_index) != V_LIST_END) { res = 0.0; while(*(sorted_v_list + v_list_index) != V_LIST_NEXT) { w = *(sorted_v_list + v_list_index); /* w = from-vertex */ t_res1 = (prev_rowp + w)->prob; t_res2 = *((hmmp->tot_transitions) + get_mtx_index(w, hmmp->nr_v-1, hmmp->nr_v)); if(t_res2 > 1.0) { t_res2 = 1.0; } res += t_res1 * t_res2; v_list_index++; } /* check if this score is best so far */ if(res > (cur_rowp + hmmp->nr_v - 1)->prob) { /* save result in matrices */ (cur_rowp + hmmp->nr_v - 1)->prob = res; /* set pointer to point to current labeling */ (cur_rowp + hmmp->nr_v - 1)->labeling = (prev_rowp + w)->labeling; } v_list_index++; }#ifdef DEBUG_FW dump_one_best_matrix(seq_len + 2, hmmp->nr_v, one_best_mtxp); dump_scaling_array(seq_len + 1, scale_fsp);#endif /* store results */ scaled_result = (cur_rowp + hmmp->nr_v - 1)->prob; memcpy(best_labeling, ((cur_rowp + hmmp->nr_v - 1)->labeling) + 1, seq_len * sizeof(char)); best_labeling[seq_len] = '\0';#ifdef DEBUG_PATH printf("seq_len = %d\n", seq_len); printf("best labeling = %s\n", best_labeling);#endif /* Garbage collection and return */ free(seq); if(hmmp->nr_alphabets > 1) { free(seq_2); } if(hmmp->nr_alphabets > 2) { free(seq_3); } if(hmmp->nr_alphabets > 3) { free(seq_4); } free(sorted_v_list); /* FREE labelings, except result labeling */ deallocate_row_labelings(prev_rowp, hmmp->nr_v); return OK;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -