📄 reestimate.c

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 C
📖 第 1 页 / 共 3 页
字号:
      size = 1;    /* If all in_a's are zero, the state can't be reached.       Set all b's to -1.0 */    if (!reachable) {      for (hist = 0; hist < size; hist++) {        col = hist * mo->M;        for (m = col; m < col + mo->M; m++) {          mo->s[i].b[m] = -1.0;        }      }    }    else {      for (hist = 0; hist < size; hist++) {        /* If the denominator is very small, we have not seen many emissions           in this state with this history.           We are conservative and just skip them. */        if (r->b_denom[i][hist] < GHMM_EPS_PREC)          continue;        else          factor = (1.0 / r->b_denom[i][hist]);        positive = 0;        /* TEST: denom. < numerator */        col = hist * mo->M;        for (m = col; m < col + mo->M; m++) {          if ((r->b_denom[i][hist] - r->b_num[i][m]) <= -GHMM_EPS_PREC) {            str = ighmm_mprintf (NULL, 0, "numerator b (%.4f) > denom (%.4f)!\n",                                 r->b_num[i][m], r->b_denom[i][hist]);            GHMM_LOG(LCONVERTED, str);            m_free (str);          }                    mo->s[i].b[m] = r->b_num[i][m] * factor;          if (mo->s[i].b[m] >= GHMM_EPS_PREC)            positive = 1;        }        if (!positive) {          str = ighmm_mprintf (NULL, 0, "All numerator b[%d][%d-%d] == 0 (denom = %g)!\n",                               i, col, col + mo->M, r->b_denom[i][hist]);          GHMM_LOG(LCONVERTED, str);          m_free (str);        }      }                           /* for each history */    }  }                             /* for (i = 0 .. < mo->N)  */  res = 0;  if (mo->model_type & GHMM_kTiedEmissions)    ghmm_dmodel_update_tie_groups (mo);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return (res);# undef CUR_PROC}                               /* reestimate_setlambda *//*----------------------------------------------------------------------------*/static int reestimate_one_step (ghmm_dmodel * mo, local_store_t * r, int seq_number,				int *seq_length, int **O, double *log_p,				double *seq_w){# define CUR_PROC "reestimate_one_step"  int res = -1;  int k, i, j, t, j_id, valid;  int e_index;  char * str;  double **alpha = NULL;  double **beta = NULL;  double *scale = NULL;  int T_k=0;  double gamma;  double log_p_k;  /* first set maxorder to zero if model_type & kHigherOrderEmissions is FALSE      TODO XXX use model->maxorder only     if model_type & kHigherOrderEmissions is TRUE */  if (!(mo->model_type & GHMM_kHigherOrderEmissions))    mo->maxorder = 0;  *log_p = 0.0;  valid = 0;  /* loop over all sequences */  for (k = 0; k < seq_number; k++) {    mo->emission_history = 0;    T_k = seq_length[k];        /* current seq. length */    /* initialization of  matrices and vector depends on T_k */    if (ighmm_reestimate_alloc_matvek (&alpha, &beta, &scale, T_k, mo->N) == -1) {      GHMM_LOG_QUEUED(LCONVERTED);      goto FREE;    }    if (ghmm_dmodel_forward (mo, O[k], T_k, alpha, scale, &log_p_k) == -1) {      GHMM_LOG_QUEUED(LCONVERTED);      goto FREE;    }    if (log_p_k != +1) {        /* O[k] can be generated */      *log_p += log_p_k;      valid = 1;      if (ghmm_dmodel_backward (mo, O[k], T_k, beta, scale) == -1) {        GHMM_LOG_QUEUED(LCONVERTED);        goto FREE;      }      /* loop over all states */      for (i = 0; i < mo->N; i++) {        /* Pi */        r->pi_num[i] += seq_w[k] * alpha[0][i] * beta[0][i];        r->pi_denom  += seq_w[k] * alpha[0][i] * beta[0][i];        for (t=0; t < T_k-1; t++) {          /* B */          if (!mo->s[i].fix) {            e_index = get_emission_index(mo, i, O[k][t], t);            if (e_index != -1) {              gamma = seq_w[k] * alpha[t][i] * beta[t][i];              r->b_num[i][e_index] += gamma;              r->b_denom[i][e_index / (mo->M)] += gamma;            }          }          update_emission_history(mo, O[k][t]);          /* A */          r->a_denom[i] += (seq_w[k] * alpha[t][i] * beta[t][i]);          for (j=0; j < mo->s[i].out_states; j++) {            j_id = mo->s[i].out_id[j];            e_index = get_emission_index(mo, j_id, O[k][t+1], t+1);            if (e_index != -1)              r->a_num[i][j] += (seq_w[k] * alpha[t][i] * mo->s[i].out_a[j]                                 * mo->s[j_id].b[e_index] * beta[t+1][j_id]                                 * (1.0 / scale[t + 1]));       /* c[t] = 1/scale[t] */          }        }        /* B: last iteration for t==T_k-1 */        t = T_k - 1;        if (!mo->s[i].fix) {          e_index = get_emission_index (mo, i, O[k][t], t);          if (e_index != -1) {            gamma = seq_w[k] * alpha[t][i] * beta[t][i];            r->b_num[i][e_index] += gamma;            r->b_denom[i][e_index / (mo->M)] += gamma;          }        }      }    }                           /* if (log_p_k != +1) */    else {      str = ighmm_mprintf(NULL, 0, "O(%d) can't be built from model mo!\n", k);      GHMM_LOG(LWARN, str);      m_free(str);    }    ighmm_reestimate_free_matvek(alpha, beta, scale, T_k);  }                             /* for (k = 0; k < seq_number; k++) */  if (valid) {    /* new parameter lambda: set directly in model */    if (reestimate_setlambda(r, mo) == -1) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }    /* printf ("---- reestimate: after normalization ----\n"); */    /*       printf("Emission:\n");       ghmm_dmodel_B_print(stdout, mo, "\t", " ", "\n");     */    /* only test: */    if (ghmm_dmodel_check(mo) == -1) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }  }  else {                        /* NO sequence can be built from model mo! */    *log_p = +1;  }  res = 0;STOP:   return (res);FREE:   ighmm_reestimate_free_matvek(alpha, beta, scale, T_k);   return (res);# undef CUR_PROC}                               /* reestimate_one_step *//*---------------------------------------------------------------------------*/static int reestimate_one_step_lean (ghmm_dmodel * mo, local_store_t * r,                                     int seq_number, int *seq_length,                                     int **seqs, double *log_p, double *seq_w){# define CUR_PROC "reestimate_one_step_lean"  int res = -1;  int i, t, k, e_index, len;  int m, j, j_id, g, g_id, l, s, h;  int size;  int * O;  double c_t;    double * alpha_last_col=NULL;  double * alpha_curr_col=NULL;  double * switching_tmp;  double scale, old_scale, scalingf;  double * summands=NULL;  local_store_t * * curr_est=NULL, * * last_est=NULL, * * switch_lst;  /* allocating memory for two columns of alpha matrix */  ARRAY_CALLOC (alpha_last_col, mo->N);  ARRAY_CALLOC (alpha_curr_col, mo->N);  /* allocating 2*N local_store_t */  ARRAY_CALLOC (last_est, mo->N);  for (i=0; i<mo->N; i++)    last_est[i] = reestimate_alloc(mo);  ARRAY_CALLOC (curr_est, mo->N);  for (i=0; i<mo->N; i++)    curr_est[i] = reestimate_alloc(mo);  /* temporary array to hold logarithmized summands     for sums over probabilities */  ARRAY_CALLOC (summands, m_max(mo->N,ghmm_ipow(mo,mo->M,mo->maxorder+1))+1);  for (k=0; k<seq_number; k++) {    len = seq_length[k];    O = seqs[k];       ghmm_dmodel_forward_init(mo, alpha_last_col, O[0], &scale);    if (scale < GHMM_EPS_PREC) {      /* means: first symbol can't be generated by hmm */      *log_p = +1;      goto STOP;    }    *log_p = - log(1/scale);        for (t=1; t<len; t++) {      old_scale = scale;      scale = 0.0;      update_emission_history(mo, O[t-1]);          /* iterate over non-silent states */      for (i=0; i<mo->N; i++) {	/* printf("  akt_ state %d\n",i);*/      	e_index = get_emission_index(mo, i, O[t], t);	if (e_index != -1){	  alpha_curr_col[i] = ghmm_dmodel_forward_step(&mo->s[i], alpha_last_col,					       mo->s[i].b[e_index]);	  scale += alpha_curr_col[i];	}	else	  alpha_curr_col[i] = 0;      }          if (scale < GHMM_EPS_PREC) {	GHMM_LOG(LCONVERTED, "scale kleiner als eps\n");	/* O-string  can't be generated by hmm */	*log_p = +1.0;	break;      }      c_t = 1/scale;      for (i=0; i<mo->N; i++)	alpha_curr_col[i] *= c_t;          /*sum log(c[t]) scaling values to get  log( P(O|lambda) ) */      *log_p -= log(c_t);      scalingf = 1/old_scale;      for (m=0; m<mo->N; m++) {	for (i=0; i<mo->N; i++) {	  /* computes estimates for the numerator of transition probabilities */	  for (j=0; j<mo->s[i].out_states; j++) {	    j_id = mo->s[i].out_id[j];	    for (g=0; g<mo->s[j_id].in_states; g++) {	      g_id = mo->s[j_id].out_id[g];	      e_index = get_emission_index(mo, g_id, O[t], t);	       /* scales all summands with the current */	      summands[g] = last_est[m]->a_num[i][j] * mo->s[j_id].in_a[g]		* mo->s[g_id].b[e_index] * scalingf;	    }	    if (j_id == m) {	      e_index = get_emission_index(mo, j_id, O[t], t);	      /* alpha is scaled. no other scaling necessary */	      summands[g++] = alpha_last_col[i] * mo->s[i].out_a[j]		* mo->s[j_id].b[e_index];	    }	    curr_est[m]->a_num[i][j] = nologSum(summands, g);	  }#ifdef calculate_denominators_extra	  /* computes denominator of transition probabilities */	  	  for (g=0; g<mo->s[m].in_states; g++) {	    g_id = mo->s[m].in_id[g];	    e_index = get_emission_index(mo, m, O[t], t);	    /* scales all summands with the current factor */	    summands[g] = last_est[m]->a_denom[i] * mo->s[m].in_a[g]	      * mo->s[m].b[e_index] * scalingf;	  }	  if (i == m) {	    for (l=0; l<mo->s[i].out_states; l++)	      if (mo->s[i].out_id[l] == i)		break;	    if (l<mo->s[i].out_states) {	      e_index = get_emission_index(mo, i, O[t], t);	      /* alpha is scaled. no other scaling necessary */	      summands[++g] = alpha_last_col[i] 		* mo->s[i].out_a[l] * mo->s[m].b[e_index];	    }	  }	  curr_est[m]->a_denom[i] = nologSum(summands, g);#endif	  /* computes estimates for the numerator of emmission probabilities*/	  if (mo->model_type & GHMM_kHigherOrderEmissions)	    size = ghmm_ipow(mo,mo->M, mo->order[i]);	  else	    size = 1;	  for (h=0; h<size; h++)	    for (s=h*mo->M; s<h*mo->M+mo->M; s++) {	      for (g=0; g<mo->s[m].in_states; g++) {		g_id = mo->s[m].out_id[g];		e_index = get_emission_index(mo, g_id, O[t], t);		/* scales all summands with the last scaling factor		   of alpha */		summands[g] = last_est[m]->b_num[i][s] * mo->s[m].in_a[g]		  * mo->s[g_id].b[e_index] * scalingf;	      }	      curr_est[m]->b_num[i][s] = nologSum(summands, g);	    }	  e_index = get_emission_index(mo, m, O[t], t);	  if (i == m) {	    for (l=0; l<mo->s[m].out_states; l++)	      if (mo->s[m].out_id[l] == m)		break;	    if (l<mo->s[m].out_states)	      /* alpha is scaled. no other scaling necessary */	      curr_est[m]->b_num[i][e_index] += alpha_last_col[i]		* mo->s[m].out_a[l] * mo->s[m].b[e_index];	  }	  	}      }      /* switching pointers of alpha_curr_col and alpha_last_col */      switching_tmp  = alpha_last_col;      alpha_last_col = alpha_curr_col;      alpha_curr_col = switching_tmp;      switch_lst = last_est;      last_est   = curr_est;      curr_est   = switch_lst;    }    /* filling the usual reestimate arrays by summing all states */    for (m=0; m<mo->N; m++) {      curr_est[m]->pi_denom = 0;      for (i=0; i<mo->N; i++) {	/* PI */	/* XXX calculate the estimates for pi numerator */	curr_est[m]->pi_num[i] = mo->s[i].pi;	r->pi_num[i] += seq_w[k] * curr_est[m]->pi_num[i];	curr_est[m]->pi_denom += curr_est[m]->pi_num[i];	/* A */	curr_est[m]->a_denom[i] = 0;	for (j=0; j<mo->s[i].out_states; j++) {	  r->a_num[i][j] += seq_w[k] * curr_est[m]->a_num[i][j];	  curr_est[m]->a_denom[i] += curr_est[m]->a_num[i][j];	}	r->a_denom[i] += seq_w[k] * curr_est[m]->a_denom[i];		/* B */	for (h=0; h<size; h++) {	  curr_est[m]->b_denom[i][h] = 0;	  for (s=h*mo->M; s<h*mo->M+mo->M; s++) {	    r->b_num[i][s] += seq_w[k] * curr_est[m]->b_num[i][s];	    curr_est[m]->b_denom[i][h] += curr_est[m]->b_num[i][s];	  }	  r->b_denom[i][h] += seq_w[k] * curr_est[m]->b_denom[i][h];	}      }      /* PI */      r->pi_denom += seq_w[k] * curr_est[m]->pi_denom;    }      }  if (*log_p == 1.0)    res = -1;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -