📄 pviterbi_propagate.c

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 C
📖 第 1 页 / 共 3 页
字号:
#ifdef DEBUG  if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y)    fprintf(stderr, "get_end_of_first: out of bounds %i %i %i\n", 	    x + pv->mo->max_offset_x, y + pv->mo->max_offset_y, 	    state);#endif  if (y - offset_y >= 0)    return pv->end_of_first[offset_x][y - offset_y + pv->mo->max_offset_y][state];  else    return NULL;}/*============================================================================*//* since we only keep the frontier we have to push back when ever a row is   complete */static void push_back_phi_prop (plocal_propagate_store_t * pv, int length_y){  int off_x, y, j;  /* push back the old phi values */  for (off_x=pv->mo->max_offset_x; off_x>0; off_x--)    for (y=0; y<length_y + pv->mo->max_offset_y + 1; y++)      for (j=0; j<pv->mo->N; j++) {	pv->phi[off_x][y][j] = pv->phi[off_x-1][y][j];	pv->end_of_first[off_x][y][j] = pv->end_of_first[off_x-1][y][j];      }}/*============================================================================*/static void init_start_stop (cell * start, cell *stop, ghmm_dpseq * X,			     ghmm_dpseq * Y, int * start_x, int * start_y,			     int * stop_x, int * stop_y) {  if (start != NULL) {    *start_x = start->x;    *start_y = start->y;  }  else {    *start_x = 0;    *start_y = 0;  }  if (stop != NULL) {    *stop_x = stop->x;    *stop_y = stop->y;  }  else {    *stop_x = X->length;    *stop_y = Y->length;  }}/*============================================================================*/int * ghmm_dpmodel_viterbi_propagate (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,			  double *log_p, int *path_length, double max_size) {#define CUR_PROC "ghmm_dpmodel_viterbi_propagate"  /* Divide and conquer algorithm to reduce the memory requirement */    /* 1. Compute the alignment of X vs Y and propagate the middle point*/  /* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and      X[len/2+1:len] vs Y[m+1:len] */  /* Break the recursion if the lengths of both sequences become tractable     for the normal ghmm_dpmodel_viterbi algorithm */  /* start of the implementation */  /* give sequence length of X = 0 to ghmm_dpmodel_viterbi alloc so traceback matrix      will not be allocated */  plocal_propagate_store_t * pv = pviterbi_propagate_alloc(mo, Y->length);  /* check if it worked */  if (!pv) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; }  /* Precomputing the log(a_ij) and log(bj(ot)) */  pviterbi_prop_precompute(mo, pv);    /* Launch the recursion */  /* double step_log;     pviterbi_propagate_step(mo, X, Y, NULL, NULL, &step_log,pv);     printf("step log for the whole sequence: %f\n", step_log); */  return pviterbi_propagate_recursion(mo, X, Y, log_p, path_length, 				      NULL, NULL,				      max_size, pv);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  /* Free the memory space */  pviterbi_propagate_free(&pv, mo->N, mo->max_offset_x, mo->max_offset_y, Y->length);  return NULL;#undef CUR_PROC}/*============================================================================*/static int * pviterbi_propagate_recursion (ghmm_dpmodel *mo, ghmm_dpseq * X,					   ghmm_dpseq * Y, double *log_p,					   int *path_length, cell *start,					   cell *stop, double max_size,					   plocal_propagate_store_t * pv) {#define CUR_PROC "pviterbi_propagate_recursion"  /* Divide and conquer algorithm to reduce the memory requirement */    /* 1. Compute the alignment of X vs Y and propagate the middle point */    /* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and      X[len/2+1:len] vs Y[m+1:len] */  /* Break the recursion if the lengths of both sequences become tractable     for the normal ghmm_dpmodel_viterbi algorithm */  /* start of the implementation */  int * path_seq = NULL;  int start_x, start_y, stop_x, stop_y;  double * original_pi=NULL;  int i;  cell * middle;  int length1, length2;  double log_p1, log_p2;  int * path1, * path2;  init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);#ifdef DEBUG  printf("Recursion: start, stop cells\n");  if(start)    ghmm_dpmodel_print_cell(start);  else    printf("(0, 0) first segment\n");  if(stop)    ghmm_dpmodel_print_cell(stop);  else  printf("(%i, %i) last segment\n", stop_x, stop_y);#endif  /* Break the recursion if the lengths of both sequences become tractable     for the normal ghmm_dpmodel_viterbi algorithm */  if ((double)(stop_x - start_x) * (double)(stop_y - start_y) < max_size) {    /* to use the unchanged ghmm_dpmodel_viterbi algorithm take slices of the sequences */    ghmm_dpseq * tractable_X = ghmm_dpseq_slice(X, start_x, stop_x);    ghmm_dpseq * tractable_Y = ghmm_dpseq_slice(Y, start_y, stop_y);    /* if this is not the very first path segment starting at zero:       temporarily change the initial probabability to go into state k+1 */#ifdef DEBUG    printf("ghmm_dpmodel_viterbi on slice  x[%i:%i], y[%i:%i]\n", 	   start_x, stop_x, start_y, stop_y); #endif    if (start != NULL) {      ARRAY_CALLOC (original_pi, mo->N);      /* save original pi and set all to zero */      for (i=0; i<mo->N; i++) {	original_pi[i] = mo->s[i].log_pi;	mo->s[i].log_pi = 1;      }      /* set the initial prob. for the state that was in the middle of path */      mo->s[start->state].log_pi = start->log_p + start->log_a;            /* compute the partial path */      if (stop != NULL)	path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, stop->previous_state);      else	path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, -1);      /* restore the original model */      for (i=0; i<mo->N; i++) 	mo->s[i].log_pi = original_pi[i];      m_free(original_pi);    }    else {      if (stop != NULL)	path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, stop->previous_state);      else	path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, -1);    }    if (*log_p == 1) {      fprintf(stderr, "Problem with slice x[%i:%i], y[%i:%i]\n", 	      start_x, stop_x, start_y, stop_y);    }    return path_seq;  }  else {    /* 1. Compute the alignment of X vs Y and propagate the middle point */    double step_log_p = 1;    /* if this is not the very first path segment starting at zero:       temporarily change the initial probabability to go into state k+1 */    if (start != NULL) {      ARRAY_CALLOC(original_pi, mo->N);      /* save original pi and set all to zero */      for (i=0; i<mo->N; i++) {	original_pi[i] = mo->s[i].log_pi;	mo->s[i].log_pi = 1;      }      /* set the initial prob. for the state that was in the middle of path */      mo->s[start->state].log_pi =  start->log_p + start->log_a;    }    middle = pviterbi_propagate_step(mo, X, Y, start, stop, &step_log_p, pv);    if (start != NULL) {      /* restore the original model */      for (i=0; i<mo->N; i++) 	mo->s[i].log_pi = original_pi[i];      m_free(original_pi);    }    /* check if there is a middle */    if (!middle) {      fprintf(stderr, "(%i, %i)->(%i, %i) No middle found!\n", 	      start_x, start_y, stop_x, stop_y);      ARRAY_CALLOC(path_seq, 1);      path_seq[0] = -1;      *path_length = 1;      *log_p = 1;      return path_seq;    }#ifdef DEBUG    else {      printf("(%i, %i)->(%i, %i) Middlepoint ", start_x, start_y,	     stop_x, stop_y);      ghmm_dpmodel_print_cell(middle);    } #endif    /* check if there is a path */    if (step_log_p == 1) {      ARRAY_CALLOC(path_seq, 1);      path_seq[0] = -1;      *path_length = 1;      *log_p = 1;      return path_seq;    }    /* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and        X[len/2+1:len] vs Y[m+1:len] */    length1 = 0;    log_p1 = 0;    path1 = pviterbi_propagate_recursion(mo, X, Y, &log_p1, &length1, 					       start, middle,					       max_size, pv);    length2 = 0;    log_p2 = 0;    path2 = pviterbi_propagate_recursion(mo, X, Y, &log_p2, &length2, 					       middle, stop,					       max_size, pv);    /* check the paths */    if (log_p1 == 1 || log_p2 == 1) {      ARRAY_CALLOC (path_seq, 1);      path_seq[0] = -1;      *path_length = 1;      *log_p = 1;      return path_seq;    }      /* concatenate the paths */    *path_length = length1 + length2;    *log_p = log_p2;#ifdef DEBUG    /* check if the transition between the ends of the paths is possible */    for (i=0; i<mo->s[path1[length1-1]].in_states; i++){      if (mo->s[path1[length1-1]].in_id[i] == path2[0])	break;    }    if (i == mo->s[path1[length1-1]].in_states) {      printf("no transition between states %i -> %i\n", path1[length1-1], path2[0]);    }    printf("Conquer: start, stop cells\n");    if(start)      ghmm_dpmodel_print_cell(start);    else      printf("(0, 0) first segment\n");    if(stop)      ghmm_dpmodel_print_cell(stop);    else      printf("(%i, %i) last segment\n", stop_x, stop_y);    printf("Path 1:\n[");    for (i=0; i<length1; i++)      printf("%i,", path1[i]);    printf("\nPath 2:\n[");    for (i=0; i<length2; i++)      printf("%i,", path2[i]);     printf("]\n");#endif	    ARRAY_CALLOC (path_seq, *path_length);    for (i=0; i<length1; i++)      path_seq[i] = path1[i];    for (i=0; i<length2; i++)      path_seq[length1 + i] = path2[i];    return path_seq;  }STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return NULL;#undef CUR_PROC}/*============================================================================*/static void init_phi_prop (plocal_propagate_store_t * pv, ghmm_dpseq * X,			   ghmm_dpseq * Y, cell * start, cell * stop) {#define CUR_PROC "init_phi_prop"  int u, v, j, i, off_x, y;  double value, max_value, previous_prob, log_b_i, log_in_a_ij ;  int start_x, start_y, stop_x, stop_y, middle_x;  ghmm_dpmodel * mo = pv->mo;  double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, 		     ghmm_dpseq*, int, int);  log_in_a = &sget_log_in_a_prop;  init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);  pv->start_x = start_x;  pv->start_y = start_y;  middle_x = start_x + (stop_x - start_x) / 2;  /* to be sure that we do not look up something out of the bounds set the     whole matrix to 1 */  /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/  for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)    for (y=0; y<Y->length + mo->max_offset_y + 1; y++)      for (j=0; j<mo->N; j++) {	pv->phi[off_x][y][j] = +1;      }  /* Inititalize the end_of_first matrix */  for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)    for (y=0; y<Y->length + mo->max_offset_y + 1; y++)      for (j=0; j<mo->N; j++) 	if (pv->end_of_first[off_x][y][j]) {	  /* m_free(pv->end_of_first[off_x][y][j]); */	  pv->end_of_first[off_x][y][j] = NULL;	}  if (mo->model_type & GHMM_kSilentStates) { /* could go into silent state at t=0 */    /*p__viterbi_silent( mo, t=0, v);*/  }  /*for (j = 0; j < mo->N; j++)    {      printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);    }  for( i = 0; i < mo->N; i++){    printf("%d\t", former_matchcount[i]);  }  for (i = 0; i < mo->N; i++){    printf("%d\t", recent_matchcount[i]);  }*/    /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */  /* this is in principle the same as the main recurrence but adds initial     probabilities to states that cannot be inhabitated at u=0, v=0 because     of greater offsets than one */  /* u, v <= max offsets */  for (u = -1; u <= mo->max_offset_x; u++) {    for (v = start_y - mo->max_offset_y; v < stop_y; v++) {      for (j = 0; j < mo->N; j++) 	{	  /** initialization of phi (lookback matrix) **/	  set_phi_prop(pv, u, v, j, 1);	  /** traceback for the propagate algorithm **/	  set_end_of_first(pv, u, v, j, NULL);	}      for (i = 0; i < mo->N; i++) {	/* Determine the maximum */	/* max_phi = phi[i] + log_in_a[j][i] ... */	if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {	  max_value = -DBL_MAX;	  set_end_of_first(pv, u, v, i, NULL);	  for (j = 0; j < mo->s[i].in_states; j++) {	    /* look back in the phi matrix at the offsets */	    previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, 					 mo->s[i].offset_y, mo->s[i].in_id[j]);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -