📄 pviterbi_propagate.c
字号:
log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; /* Critical point for the propagate algorithm if we are at the middle point of sequence X store this at the end point of the first alignment */ if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) { cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1), v - (mo->s[i].offset_y - 1), i, mo->s[i].in_id[j], previous_prob, log_in_a_ij); if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) { cell * old = get_end_of_first(pv, u, v, 0, 0, i); m_free(old); } set_end_of_first(pv, u, v, i, end_of_first); } else { /* at all other points simply propagate the values on */ set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j])); } } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ }#ifdef DEBUG int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); }#endif log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* this is the difference from the main loop: check whether this state could be an initial state and add the initial probability */ if (log_b_i == +1 ) { set_phi_prop(pv, u, v, i, +1); } else { if (max_value == -DBL_MAX) set_phi_prop(pv, u, v, i, +1); else set_phi_prop(pv, u, v, i, max_value); /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 + start_y == v) { */ if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 + start_y == v){ set_phi_prop(pv, u, v, i, mo->s[i].log_pi);#ifdef DEBUG printf("Initial log prob state %i at (%i, %i) = %f\n", i, start_x + u, v, get_phi_prop(pv, u, v, 0, 0, i)); printf("Characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v));#endif } if (get_phi_prop(pv, u, v, 0, 0, i) != 1) { set_phi_prop(pv, u, v, i, get_phi_prop(pv, u, v, 0, 0, i) + log_b_i); } } } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi_prop(pv, Y->length);#ifdef DEBUG if (start != NULL && mo->s[start->state].offset_y == 0) { max_value = -DBL_MAX; i = -1; y = -1; off_x = -1; int x; for (x = 0; x<=mo->max_offset_x; x++) for (v = - mo->max_offset_y; v<Y->length; v++) { for (j=0; j<mo->N; j++) { if (get_phi_prop(pv, x, v, x, 0, j) >= max_value && get_phi_prop(pv, x, v, x, 0, j) < 1 - PROP_EPS) { max_value = get_phi_prop(pv, x, v, x, 0, j); i = j; off_x = x; y = v; } } } printf("u = %i start_x = %i off_x = %i ", u, start_x, off_x); printf("max log prob state %i at (%i, %i) = %f after pushback\n", i, start_x + u - (off_x - 1), y, get_phi_prop(pv, u, y, off_x, 0, i)); }#endif } /* End for u in X */#undef CUR_PROC}/*============================================================================*/static cell * pviterbi_propagate_step (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, cell * start, cell * stop, double * log_p, plocal_propagate_store_t * pv) {#define CUR_PROC "pviterbi_step" /* printf("---- propagate step -----\n"); */ int u, v, j, i; double value, max_value, previous_prob; /* int len_path = mo->N*len; the length of the path is not known apriori */ int start_x, start_y, stop_x, stop_y; double log_b_i, log_in_a_ij; cell * middle = NULL; int middle_x; double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a_prop; init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y); middle_x = start_x + (stop_x - start_x) / 2;/* if (mo->model_type & kSilentStates && *//* mo->silent != NULL && *//* mo->topo_order == NULL) { *//* ghmm_dmodel_topo_order( mo ); *//* } */ init_phi_prop(pv, X, Y, start, stop);#ifdef DEBUG if (start != NULL && mo->s[start->state].offset_y == 0) { for (u = 0; u<=mo->max_offset_x; u++) { printf("row %i of phi\n", u); for (v = start_y - 1; v < stop_y; v++) { printf("phi(0, %i, %i): %f, ", v, start->state, get_phi_prop(pv, 0, v, 0, 0, start->state)); } printf("\n\n"); } }#endif /* u, v > 0 */ /** THIS IS THE MAIN RECURRENCE **/ /* printf("Main loop x from %i to %i and y from %i to %i\n", start_x + mo->max_offset_x + 1, stop_x, start_y, stop_y);*/ for (u = start_x + mo->max_offset_x + 1; u < stop_x; u++) { for (v = start_y - mo->max_offset_y; v < stop_y; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix) **/ set_phi_prop(pv, u, v, j, +1); set_end_of_first(pv, 0, v, j, NULL); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i]) { max_value = -DBL_MAX; set_end_of_first(pv, 0, v, i, NULL); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; /* Critical point for the propagate algorithm if we are at the middle point of sequence X store this at the end point of the first alignment */ if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) { cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1), v - (mo->s[i].offset_y - 1), i, mo->s[i].in_id[j], previous_prob, log_in_a_ij); if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) { cell * old = get_end_of_first(pv, u, v, 0, 0, i); m_free(old); } set_end_of_first(pv, u, v, i, end_of_first); } else { /* at all other points simply propagate the values on */ set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j])); } } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ }#ifdef DEBUG int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); }#endif log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* No maximum found (that is, state never reached) or the output O[t] = 0.0: */ if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */ log_b_i == +1 ) { set_phi_prop(pv, u, v, i, 1); } else set_phi_prop(pv, u, v, i, max_value + log_b_i); } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi_prop(pv, Y->length); } /* End for u in X */ /* Termination */ max_value = -DBL_MAX; /* for the last segment search for the maximum probability at the end of the two sequences */ if (stop == NULL){ for (j = 0; j < mo->N; j++){#ifdef DEBUG /* printf("phi(len_x)(len_y)(%i)=%f\n", j, pv->phi[0][stop_y-1][j]); ghmm_dpmodel_print_cell(pv->end_of_first[0][stop_y - 1][j]); */#endif if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) != +1 && get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) > max_value) { max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j); middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, j); } } } /* traceback for the interior segments have to start with the previous state of the middle beacuse the path has to be connected */ else { if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state) != +1 ) { max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state); middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state); } } if (max_value == -DBL_MAX) { /* Sequence can't be generated from the model! */ *log_p = +1; } else { *log_p = max_value; } return middle;#undef CUR_PROC}/*===========================================================================*/int * ghmm_dpmodel_viterbi_propagate_segment (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length, double max_size, int start_x, int start_y, int stop_x, int stop_y, int start_state, int stop_state, double start_log_p, double stop_log_p) {#define CUR_PROC "ghmm_dpmodel_viterbi_propagate_segment" int * path_seq = NULL; cell * start, * stop; plocal_propagate_store_t * pv = pviterbi_propagate_alloc(mo, Y->length); /* Precomputing the log(a_ij) and log(bj(ot)) */ pviterbi_prop_precompute(mo, pv); /* init start and stop */ start = init_cell (start_x, start_y, start_state, -1, start_log_p, 0); stop = init_cell (stop_x, stop_y, stop_state, stop_state, stop_log_p, 0); /* Launch the recursion */ path_seq = pviterbi_propagate_recursion (mo, X, Y, log_p, path_length, start, stop, max_size, pv); pviterbi_propagate_free (&pv, mo->N, mo->max_offset_x, mo->max_offset_y, Y->length); return path_seq;#undef CUR_PROC}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -