📄 pviterbi_propagate.c
字号:
#ifdef DEBUG if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y) fprintf(stderr, "get_end_of_first: out of bounds %i %i %i\n", x + pv->mo->max_offset_x, y + pv->mo->max_offset_y, state);#endif if (y - offset_y >= 0) return pv->end_of_first[offset_x][y - offset_y + pv->mo->max_offset_y][state]; else return NULL;}/*============================================================================*//* since we only keep the frontier we have to push back when ever a row is complete */static void push_back_phi_prop (plocal_propagate_store_t * pv, int length_y){ int off_x, y, j; /* push back the old phi values */ for (off_x=pv->mo->max_offset_x; off_x>0; off_x--) for (y=0; y<length_y + pv->mo->max_offset_y + 1; y++) for (j=0; j<pv->mo->N; j++) { pv->phi[off_x][y][j] = pv->phi[off_x-1][y][j]; pv->end_of_first[off_x][y][j] = pv->end_of_first[off_x-1][y][j]; }}/*============================================================================*/static void init_start_stop (cell * start, cell *stop, ghmm_dpseq * X, ghmm_dpseq * Y, int * start_x, int * start_y, int * stop_x, int * stop_y) { if (start != NULL) { *start_x = start->x; *start_y = start->y; } else { *start_x = 0; *start_y = 0; } if (stop != NULL) { *stop_x = stop->x; *stop_y = stop->y; } else { *stop_x = X->length; *stop_y = Y->length; }}/*============================================================================*/int * ghmm_dpmodel_viterbi_propagate (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length, double max_size) {#define CUR_PROC "ghmm_dpmodel_viterbi_propagate" /* Divide and conquer algorithm to reduce the memory requirement */ /* 1. Compute the alignment of X vs Y and propagate the middle point*/ /* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and X[len/2+1:len] vs Y[m+1:len] */ /* Break the recursion if the lengths of both sequences become tractable for the normal ghmm_dpmodel_viterbi algorithm */ /* start of the implementation */ /* give sequence length of X = 0 to ghmm_dpmodel_viterbi alloc so traceback matrix will not be allocated */ plocal_propagate_store_t * pv = pviterbi_propagate_alloc(mo, Y->length); /* check if it worked */ if (!pv) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* Precomputing the log(a_ij) and log(bj(ot)) */ pviterbi_prop_precompute(mo, pv); /* Launch the recursion */ /* double step_log; pviterbi_propagate_step(mo, X, Y, NULL, NULL, &step_log,pv); printf("step log for the whole sequence: %f\n", step_log); */ return pviterbi_propagate_recursion(mo, X, Y, log_p, path_length, NULL, NULL, max_size, pv);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ /* Free the memory space */ pviterbi_propagate_free(&pv, mo->N, mo->max_offset_x, mo->max_offset_y, Y->length); return NULL;#undef CUR_PROC}/*============================================================================*/static int * pviterbi_propagate_recursion (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length, cell *start, cell *stop, double max_size, plocal_propagate_store_t * pv) {#define CUR_PROC "pviterbi_propagate_recursion" /* Divide and conquer algorithm to reduce the memory requirement */ /* 1. Compute the alignment of X vs Y and propagate the middle point */ /* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and X[len/2+1:len] vs Y[m+1:len] */ /* Break the recursion if the lengths of both sequences become tractable for the normal ghmm_dpmodel_viterbi algorithm */ /* start of the implementation */ int * path_seq = NULL; int start_x, start_y, stop_x, stop_y; double * original_pi=NULL; int i; cell * middle; int length1, length2; double log_p1, log_p2; int * path1, * path2; init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);#ifdef DEBUG printf("Recursion: start, stop cells\n"); if(start) ghmm_dpmodel_print_cell(start); else printf("(0, 0) first segment\n"); if(stop) ghmm_dpmodel_print_cell(stop); else printf("(%i, %i) last segment\n", stop_x, stop_y);#endif /* Break the recursion if the lengths of both sequences become tractable for the normal ghmm_dpmodel_viterbi algorithm */ if ((double)(stop_x - start_x) * (double)(stop_y - start_y) < max_size) { /* to use the unchanged ghmm_dpmodel_viterbi algorithm take slices of the sequences */ ghmm_dpseq * tractable_X = ghmm_dpseq_slice(X, start_x, stop_x); ghmm_dpseq * tractable_Y = ghmm_dpseq_slice(Y, start_y, stop_y); /* if this is not the very first path segment starting at zero: temporarily change the initial probabability to go into state k+1 */#ifdef DEBUG printf("ghmm_dpmodel_viterbi on slice x[%i:%i], y[%i:%i]\n", start_x, stop_x, start_y, stop_y); #endif if (start != NULL) { ARRAY_CALLOC (original_pi, mo->N); /* save original pi and set all to zero */ for (i=0; i<mo->N; i++) { original_pi[i] = mo->s[i].log_pi; mo->s[i].log_pi = 1; } /* set the initial prob. for the state that was in the middle of path */ mo->s[start->state].log_pi = start->log_p + start->log_a; /* compute the partial path */ if (stop != NULL) path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, stop->previous_state); else path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, -1); /* restore the original model */ for (i=0; i<mo->N; i++) mo->s[i].log_pi = original_pi[i]; m_free(original_pi); } else { if (stop != NULL) path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, stop->previous_state); else path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, -1); } if (*log_p == 1) { fprintf(stderr, "Problem with slice x[%i:%i], y[%i:%i]\n", start_x, stop_x, start_y, stop_y); } return path_seq; } else { /* 1. Compute the alignment of X vs Y and propagate the middle point */ double step_log_p = 1; /* if this is not the very first path segment starting at zero: temporarily change the initial probabability to go into state k+1 */ if (start != NULL) { ARRAY_CALLOC(original_pi, mo->N); /* save original pi and set all to zero */ for (i=0; i<mo->N; i++) { original_pi[i] = mo->s[i].log_pi; mo->s[i].log_pi = 1; } /* set the initial prob. for the state that was in the middle of path */ mo->s[start->state].log_pi = start->log_p + start->log_a; } middle = pviterbi_propagate_step(mo, X, Y, start, stop, &step_log_p, pv); if (start != NULL) { /* restore the original model */ for (i=0; i<mo->N; i++) mo->s[i].log_pi = original_pi[i]; m_free(original_pi); } /* check if there is a middle */ if (!middle) { fprintf(stderr, "(%i, %i)->(%i, %i) No middle found!\n", start_x, start_y, stop_x, stop_y); ARRAY_CALLOC(path_seq, 1); path_seq[0] = -1; *path_length = 1; *log_p = 1; return path_seq; }#ifdef DEBUG else { printf("(%i, %i)->(%i, %i) Middlepoint ", start_x, start_y, stop_x, stop_y); ghmm_dpmodel_print_cell(middle); } #endif /* check if there is a path */ if (step_log_p == 1) { ARRAY_CALLOC(path_seq, 1); path_seq[0] = -1; *path_length = 1; *log_p = 1; return path_seq; } /* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and X[len/2+1:len] vs Y[m+1:len] */ length1 = 0; log_p1 = 0; path1 = pviterbi_propagate_recursion(mo, X, Y, &log_p1, &length1, start, middle, max_size, pv); length2 = 0; log_p2 = 0; path2 = pviterbi_propagate_recursion(mo, X, Y, &log_p2, &length2, middle, stop, max_size, pv); /* check the paths */ if (log_p1 == 1 || log_p2 == 1) { ARRAY_CALLOC (path_seq, 1); path_seq[0] = -1; *path_length = 1; *log_p = 1; return path_seq; } /* concatenate the paths */ *path_length = length1 + length2; *log_p = log_p2;#ifdef DEBUG /* check if the transition between the ends of the paths is possible */ for (i=0; i<mo->s[path1[length1-1]].in_states; i++){ if (mo->s[path1[length1-1]].in_id[i] == path2[0]) break; } if (i == mo->s[path1[length1-1]].in_states) { printf("no transition between states %i -> %i\n", path1[length1-1], path2[0]); } printf("Conquer: start, stop cells\n"); if(start) ghmm_dpmodel_print_cell(start); else printf("(0, 0) first segment\n"); if(stop) ghmm_dpmodel_print_cell(stop); else printf("(%i, %i) last segment\n", stop_x, stop_y); printf("Path 1:\n["); for (i=0; i<length1; i++) printf("%i,", path1[i]); printf("\nPath 2:\n["); for (i=0; i<length2; i++) printf("%i,", path2[i]); printf("]\n");#endif ARRAY_CALLOC (path_seq, *path_length); for (i=0; i<length1; i++) path_seq[i] = path1[i]; for (i=0; i<length2; i++) path_seq[length1 + i] = path2[i]; return path_seq; }STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return NULL;#undef CUR_PROC}/*============================================================================*/static void init_phi_prop (plocal_propagate_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y, cell * start, cell * stop) {#define CUR_PROC "init_phi_prop" int u, v, j, i, off_x, y; double value, max_value, previous_prob, log_b_i, log_in_a_ij ; int start_x, start_y, stop_x, stop_y, middle_x; ghmm_dpmodel * mo = pv->mo; double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a_prop; init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y); pv->start_x = start_x; pv->start_y = start_y; middle_x = start_x + (stop_x - start_x) / 2; /* to be sure that we do not look up something out of the bounds set the whole matrix to 1 */ /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/ for (off_x=0; off_x<mo->max_offset_x + 1; off_x++) for (y=0; y<Y->length + mo->max_offset_y + 1; y++) for (j=0; j<mo->N; j++) { pv->phi[off_x][y][j] = +1; } /* Inititalize the end_of_first matrix */ for (off_x=0; off_x<mo->max_offset_x + 1; off_x++) for (y=0; y<Y->length + mo->max_offset_y + 1; y++) for (j=0; j<mo->N; j++) if (pv->end_of_first[off_x][y][j]) { /* m_free(pv->end_of_first[off_x][y][j]); */ pv->end_of_first[off_x][y][j] = NULL; } if (mo->model_type & GHMM_kSilentStates) { /* could go into silent state at t=0 */ /*p__viterbi_silent( mo, t=0, v);*/ } /*for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for( i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); }*/ /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */ /* this is in principle the same as the main recurrence but adds initial probabilities to states that cannot be inhabitated at u=0, v=0 because of greater offsets than one */ /* u, v <= max offsets */ for (u = -1; u <= mo->max_offset_x; u++) { for (v = start_y - mo->max_offset_y; v < stop_y; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix) **/ set_phi_prop(pv, u, v, j, 1); /** traceback for the propagate algorithm **/ set_end_of_first(pv, u, v, j, NULL); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_end_of_first(pv, u, v, i, NULL); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -