📄 core_algorithms_multialpha.c
字号:
/* get alphabet index for c*/ if(hmmp->alphabet_type == DISCRETE) { replacement_letter_c = NO; a_index = get_alphabet_index(&seq[c], hmmp->alphabet, hmmp->a_size); if(a_index < 0) { a_index = get_replacement_letter_index_multi(&seq[c], hmmp->replacement_letters, 1); if(a_index < 0) { printf("Letter '%s' is not in alphabet\n", (&seq[c])->letter); return NOPROB; } replacement_letter_c = YES; } } if(hmmp->nr_alphabets > 1) { if(hmmp->alphabet_type_2 == DISCRETE) { replacement_letter_c_2 = NO; a_index_2 = get_alphabet_index(&seq_2[c], hmmp->alphabet_2, hmmp->a_size_2); if(a_index_2 < 0) { a_index_2 = get_replacement_letter_index_multi(&seq_2[c], hmmp->replacement_letters, 2); if(a_index_2 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_2[c])->letter); return NOPROB; } replacement_letter_c_2 = YES; } } } if(hmmp->nr_alphabets > 2) { if(hmmp->alphabet_type_3 == DISCRETE) { replacement_letter_c_3 = NO; a_index_3 = get_alphabet_index(&seq_3[c], hmmp->alphabet_3, hmmp->a_size_3); if(a_index_3 < 0) { a_index_3 = get_replacement_letter_index_multi(&seq_3[c], hmmp->replacement_letters, 3); if(a_index_3 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_3[c])->letter); return NOPROB; } replacement_letter_c_3 = YES; } } } if(hmmp->nr_alphabets > 3) { if(hmmp->alphabet_type_4 == DISCRETE) { replacement_letter_c_4 = NO; a_index_4 = get_alphabet_index(&seq_4[c], hmmp->alphabet_4, hmmp->a_size_4); if(a_index_4 < 0) { a_index_4 = get_replacement_letter_index_multi(&seq_4[c], hmmp->replacement_letters, 4); if(a_index_4 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_4[c])->letter); return NOPROB; } replacement_letter_c_4 = YES; } } } /* calculate sum of probabilities */ for(v = 0; v < hmmp->nr_v - 1; v++) /* v = from-vertex */{#ifdef DEBUG_BW printf("prob passing through vertex %d:\n", v);#endif res = 0; wp = *(hmmp->tot_to_trans_array + v); while(wp->vertex != END) /* w = to-vertex */ { /* total probability of transiting from v to w on all possible path (via silent states) */ t_res2 = *((hmmp->tot_transitions) + get_mtx_index(v, wp->vertex, hmmp->nr_v)); if(t_res2 < 0) { printf("found model transition prob from %d to %d < 0.0\n", v, wp->vertex); exit(0); } t_res1 = (prev_rowp + wp->vertex)->prob; /* probability of having produced the * sequence after c passing through * vertex w */ /* calculate prob of producing letter l in v */ t_res3 = get_single_emission_score_multi(hmmp, seq, seq_2, seq_3, seq_4, c, wp->vertex, replacement_letter_c, replacement_letter_c_2, replacement_letter_c_3, replacement_letter_c_4, use_labels, a_index, a_index_2, a_index_3, a_index_4, multi_scoring_method); res += t_res1 * t_res2 * t_res3; wp++; }#ifdef DEBUG_BW printf("prev = %f: ", t_res1); printf("trans = %f\n", t_res2);#endif /* save result in matrices */ (cur_rowp + v)->prob = res;#ifdef DEBUG_BW printf("res = %f\n", res);#endif } /* scale the results using the scaling factors produced by forward() */#ifdef DEBUG_BW printf("scaling set to: %f\n", scale_fsp[c]); printf("and c = %d\n", c); dump_scaling_array(seq_len + 1, scale_fsp);#endif for(v = 0; v < hmmp->nr_v; v++) { if((cur_rowp + v)->prob != 0){ if(scale_fsp[c] != 0.0) { (cur_rowp + v)->prob = ((cur_rowp + v)->prob)/scale_fsp[c]; /* scaling */ } else { printf("Sequence cannot be produced by this hmm\n"); sequence_as_string(s); return NOPROB; } } } /* move row pointers one row backward */ prev_rowp = cur_rowp; cur_rowp = cur_rowp - hmmp->nr_v; }#ifdef DEBUG_BW dump_backward_matrix(seq_len + 2, hmmp->nr_v, backw_mtxp); dump_scaling_array(seq_len + 1, scale_fsp);#endif /* Garbage collection and return */ free(seq); if(hmmp->nr_alphabets > 1) { free(seq_2); } if(hmmp->nr_alphabets > 2) { free(seq_3); } if(hmmp->nr_alphabets > 3) { free(seq_4); } return OK;}/************************* the viterbi algorithm **********************************/int viterbi_multi(struct hmm_multi_s *hmmp, struct letter_s *s, struct letter_s *s_2, struct letter_s *s_3, struct letter_s *s_4, struct viterbi_s **ret_viterbi_mtxpp, int use_labels, int multi_scoring_method){ struct viterbi_s *viterbi_mtxp, *cur_rowp; struct viterbi_s *prev_rowp; /* pointers to viterbi matrix */ struct letter_s *seq, *seq_2, *seq_3, *seq_4; /* pointer to the sequence */ int i; /* loop index */ int prev; /* nr of previous vertex in viterbi path */ int seq_len; /* length of the sequence */ int c, v, w; /* looping indices, c loops over the sequence, v and w over the vertices in the HMM */ int a_index, a_index_2, a_index_3, a_index_4; /* holds the current letter's position in the alphabet */ double max, res, t_res1, t_res2, t_res3; /* temporary variables to calculate probabilities */ double t_res3_1, t_res3_2, t_res3_3, t_res3_4; struct path_element *wp, *from_vp, *prevp; /* pointers to elements in to_trans_array and * from_trans_array are used for retrieving the path * from state a to state b */ int replacement_letter_c, replacement_letter_c_2, replacement_letter_c_3, replacement_letter_c_4; int MARKOV_CHAIN; /* set to yes means all emission probs = 1.0, independent of the letter and the state distribution */ MARKOV_CHAIN = NO; /* Allocate memory for matrix + some initial setup: * Note 1: viterbi probability matrix has the vertex indices horizontally * and the sequence indices vertically meaning it will be filled row by row * Note 2: *ret_viterbi_mtxpp is allocated here, but must be * freed by caller * Note 3: The viterbi algorithm uses the to_trans_array to remember the path * taken from a state to another (by letting the prevp point to the path in * the to_trans_array that was taken to reach this state) */ seq_len = get_seq_length(s); *ret_viterbi_mtxpp = (struct viterbi_s*) (malloc_or_die((seq_len+2) * hmmp->nr_v * sizeof(struct viterbi_s))); init_viterbi_s_mtx(*ret_viterbi_mtxpp, DEFAULT, (seq_len+2) * hmmp->nr_v); viterbi_mtxp = *ret_viterbi_mtxpp; /* Convert sequence to 1...L for easier indexing */ seq = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s))); memcpy(seq+1, s, seq_len * sizeof(struct letter_s)); if(hmmp->nr_alphabets > 1) { seq_2 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s))); memcpy(seq_2+1, s_2, seq_len * sizeof(struct letter_s)); } if(hmmp->nr_alphabets > 2) { seq_3 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s))); memcpy(seq_3+1, s_3, seq_len * sizeof(struct letter_s)); } if(hmmp->nr_alphabets > 3) { seq_4 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s))); memcpy(seq_4+1, s_4, seq_len * sizeof(struct letter_s)); } /* Initialize the first row of the matrix */ viterbi_mtxp->prob = 0.0; /* sets index (0,0) to 0.0 (i.e. prob 1.0), the rest are already DEFAULT as they should be */ /* Fill in middle rows */ prev_rowp = viterbi_mtxp; cur_rowp = viterbi_mtxp + hmmp->nr_v; for(c = 1; c <= seq_len; c++) { /* get alphabet index for c*/ if(hmmp->alphabet_type == DISCRETE) { replacement_letter_c = NO; a_index = get_alphabet_index(&seq[c], hmmp->alphabet, hmmp->a_size); if(a_index < 0) { a_index = get_replacement_letter_index_multi(&seq[c], hmmp->replacement_letters, 1); if(a_index < 0) { printf("Letter '%s' is not in alphabet\n", (&seq[c])->letter); return NOPROB; } replacement_letter_c = YES; } } if(hmmp->nr_alphabets > 1) { if(hmmp->alphabet_type_2 == DISCRETE) { replacement_letter_c_2 = NO; a_index_2 = get_alphabet_index(&seq_2[c], hmmp->alphabet_2, hmmp->a_size_2); if(a_index_2 < 0) { a_index_2 = get_replacement_letter_index_multi(&seq_2[c], hmmp->replacement_letters, 2); if(a_index_2 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_2[c])->letter); return NOPROB; } replacement_letter_c_2 = YES; } } } if(hmmp->nr_alphabets > 2) { if(hmmp->alphabet_type_3 == DISCRETE) { replacement_letter_c_3 = NO; a_index_3 = get_alphabet_index(&seq_3[c], hmmp->alphabet_3, hmmp->a_size_3); if(a_index_3 < 0) { a_index_3 = get_replacement_letter_index_multi(&seq_3[c], hmmp->replacement_letters, 3); if(a_index_3 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_3[c])->letter); return NOPROB; } replacement_letter_c_3 = YES; } } } if(hmmp->nr_alphabets > 3) { if(hmmp->alphabet_type_4 == DISCRETE) { replacement_letter_c_4 = NO; a_index_4 = get_alphabet_index(&seq_4[c], hmmp->alphabet_4, hmmp->a_size_4); if(a_index_4 < 0) { a_index_4 = get_replacement_letter_index_multi(&seq_4[c], hmmp->replacement_letters, 4); if(a_index_4 < 0) { printf("Letter '%s' is not in alphabet\n", (&seq_4[c])->letter); return NOPROB; } replacement_letter_c_4 = YES; } } } /* calculate sum of probabilities */ for(v = 1; v < hmmp->nr_v - 1; v++) /* v = to-vertex */ {#ifdef DEBUG_VI printf("prob to vertex %d:\n", v);#endif prev = NO_PREV; max = DEFAULT; wp = *(hmmp->tot_from_trans_array + v); res = 0; while((w = wp->vertex) != END) /* w = from-vertex */ { /* calculate intermediate results */ from_vp = wp; t_res1 = (prev_rowp + w)->prob; /* probability of having produced the * sequence up to the last letter ending * in vertex w */ /* calculate intermediate results */ t_res2 = *(hmmp->max_log_transitions + (w * hmmp->nr_v + v)); if(t_res1 != DEFAULT && t_res2 != DEFAULT) { res = t_res1 + t_res2; } else { res = DEFAULT; } if(prev == NO_PREV && res != DEFAULT && (use_labels == NO || seq[c].label == *(hmmp->vertex_labels + v) || seq[c].label == '.')) { max = res; prev = from_vp->vertex; prevp = from_vp; continue; }#ifdef DEBUG_VI printf("seqlabel = %c, vertexlable = %c\n", seq[c].label,*(hmmp->vertex_labels + v)); #endif if(res > max && res != DEFAULT && (use_labels == NO || seq[c].label == *(hmmp->vertex_labels + v) || seq[c].label == '.')) { max = res; prev = from_vp->vertex; prevp = from_vp; } wp++;#ifdef DEBUG_VI printf("prev in pos(%d) = %f: ",from_vp->vertex, t_res1); printf("trans from %d to %d = %f\n", from_vp->vertex, v, t_res2);#endif } #ifdef DEBUG_VI printf("max before setting: %f\n", max);#endif /* add the logprob of reaching state v to the logprob of producing letter l in v*/ if(MARKOV_CHAIN == YES) { if((*((hmmp->log_emissions) + (v * (hmmp->a_size)) + a_index)) != DEFAULT && max != DEFAULT) { max = max + 0.0; } else { max = DEFAULT; } } else { /* calculate prob of producing letter l in v */ t_res3 = get_single_emission_score_multi(hmmp, seq, seq_2, seq_3, seq_4, c, v, replacement_letter_c, replacement_letter_c_2, replacement_letter_c_3, replacement_letter_c_4, use_labels, a_index, a_index_2, a_index_3, a_index_4, multi_scoring_method); } if(t_res3 == 0.0) { max = DEFAULT; } else { t_res3 = log10(t_res3); if(max != DEFAULT) { max = max + t_res3; } } /* save result in matrices */ (cur_rowp + v)->prob = max; (cur_rowp + v)->prev = prev; (cur_rowp + v)->prevp = prevp;#ifdef DEBUG_VI printf("max after setting: %f\n", max);#endif } /* move row pointers one row forward */ prev_rowp = cur_rowp; cur_rowp = cur_rowp + hmmp->nr_v; } /* Fill in transition to end state */#ifdef DEBUG_VI printf("\ntransition to end state:\n");#endif max = DEFAULT; prev = NO_PREV; prevp = NULL; wp = *(hmmp->from_trans_array + hmmp->nr_v-1); while(wp->vertex != END) /* w = from-vertex */ { from_vp = wp; t_res1 = (prev_rowp + wp->vertex)->prob; /* probability of having produced the * sequence up to the last letter ending * in vertex w */ t_res2 = 0.0; while(wp->next != NULL) { t_res2 = t_res2 + *((hmmp->log_transitions) + get_mtx_index(wp->vertex, (wp+1)->vertex, hmmp->nr_v)); wp++; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -