📄 reestimate.c
字号:
else res = 0;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ /* deallocation */ if (last_est) for (i=0; i<mo->N; i++) reestimate_free(&(last_est[i]), mo->N); m_free(last_est); if (curr_est) for (i=0; i<mo->N; i++) reestimate_free(&(curr_est[i]), mo->N); m_free(curr_est); m_free(alpha_last_col); m_free(alpha_curr_col); m_free(summands); return res; #undef CUR_PROC}/*============================================================================*/int ghmm_dmodel_baum_welch (ghmm_dmodel * mo, ghmm_dseq * sq){# define CUR_PROC "ghmm_dmodel_baum_welch" return ghmm_dmodel_baum_welch_nstep (mo, sq, GHMM_MAX_ITER_BW, GHMM_EPS_ITER_BW);# undef CUR_PROC} /* ghmm_dmodel_baum_welch *//*============================================================================*/int ghmm_dmodel_baum_welch_nstep (ghmm_dmodel * mo, ghmm_dseq * sq, int max_step, double likelihood_delta){# define CUR_PROC "ghmm_dmodel_baum_welch" int n, k, valid; double log_p, log_p_old, log_p_k, diff; local_store_t *r = NULL; int res = -1; char *str; /* local store for all iterations */ r = reestimate_alloc (mo); if (!r) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; }; log_p_old = -DBL_MAX; n = 1; /* main loop Baum-Welch-Alg. */ while (n <= max_step) { if (1) { if (reestimate_one_step(mo, r, sq->seq_number, sq->seq_len, sq->seq, &log_p, sq->seq_w) == -1) { str = ighmm_mprintf (NULL, 0, "reestimate_one_step false (%d.step)\n", n); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } } else { if (reestimate_one_step_lean(mo, r, sq->seq_number, sq->seq_len, sq->seq, &log_p, sq->seq_w) == -1) { str = ighmm_mprintf (NULL, 0, "reestimate_one_step false (%d.step)\n", n); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } } /*if (n == 1)*/ /*printf("%8.5f (-log_p input model)\n", -log_p); */ /*else*/ /*printf("%8.5f (-log_p)\n", -log_p); */ if (log_p == +1) { printf ("Reestimate stopped: No sequence can be built from model mo!\n"); break; } diff = log_p - log_p_old; /* error in convergence ? */ if (diff < -GHMM_EPS_PREC) { str = ighmm_mprintf (NULL, 0, "No convergence: log P < log P-old! (n=%d)\n", n); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } else if (log_p > GHMM_EPS_PREC) { str = ighmm_mprintf (NULL, 0, "No convergence: log P > 0! (n=%d)\n", n); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } /* stop iterations? */ if (diff < fabs ((double) likelihood_delta * log_p)) { printf("Convergence after %d steps\n", n); break; } else { /* for next iteration */ log_p_old = log_p; reestimate_init (r, mo); /* sets all fields to zero */ n++; } } /* while (n <= MAX_ITER) */ /* log_p of reestimated model */ log_p = 0.0; valid = 0; for (k = 0; k < sq->seq_number; k++) { if (ghmm_dmodel_logp (mo, sq->seq[k], sq->seq_len[k], &log_p_k) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } if (log_p_k != +1) { log_p += log_p_k; valid = 1; } } if (!valid) log_p = +1; /*printf("%8.5f (-log_p optimized model)\n", -log_p);*/ /* check new parameter for plausibility */ /* if (ghmm_dmodel_check(mo) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } */ res = 0;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ reestimate_free (&r, mo->N); return res;# undef CUR_PROC} /* ghmm_dmodel_baum_welch_nstep *//**============================ Labeled HMMs ================================**//*----------------------------------------------------------------------------*/static int reestimate_one_step_label (ghmm_dmodel * mo, local_store_t * r, int seq_number, int *seq_length, int **O, int **label, double *log_p, double *seq_w){# define CUR_PROC "reestimate_one_step_label" int k, i, j, t, j_id; int e_index, T_k, valid = 0; double **alpha = NULL; double **beta = NULL; double *scale = NULL; double gamma; double log_p_k; char *str; /* first set maxorder to zero if model_type & kHigherOrderEmissions is FALSE TODO XXX use model->maxorder only if model_type & GHMM_kHigherOrderEmissions is TRUE */ if (!(mo->model_type & GHMM_kHigherOrderEmissions)) mo->maxorder = 0; *log_p = 0.0; /* loop over all sequences */ for (k = 0; k < seq_number; k++) { mo->emission_history = 0; T_k = seq_length[k]; /* current seq. length */ /* initialization of matrices and vector depends on T_k */ if (ighmm_reestimate_alloc_matvek (&alpha, &beta, &scale, T_k, mo->N) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto FREE; } if (ghmm_dmodel_label_forward (mo, O[k], label[k], T_k, alpha, scale, &log_p_k) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto FREE; } if (log_p_k != +1) { /* O[k] can be generated */ *log_p += log_p_k; valid = 1; if (ghmm_dmodel_label_backward (mo, O[k], label[k], T_k, beta, scale, &log_p_k) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto FREE; } /* loop over all states */ for (i = 0; i < mo->N; i++) { /* Pi */ r->pi_num[i] += seq_w[k] * alpha[0][i] * beta[0][i]; r->pi_denom += seq_w[k] * alpha[0][i] * beta[0][i]; for (t = 0; t < T_k - 1; t++) { /* B */ if (!(mo->s[i].fix) && (mo->label[i] == label[k][t])) { e_index = get_emission_index (mo, i, O[k][t], t); if (e_index != -1) { gamma = seq_w[k] * alpha[t][i] * beta[t][i]; r->b_num[i][e_index] += gamma; r->b_denom[i][e_index / (mo->M)] += gamma; } } update_emission_history (mo, O[k][t]); /* A */ r->a_denom[i] += seq_w[k] * alpha[t][i] * beta[t][i]; for (j = 0; j < mo->s[i].out_states; j++) { j_id = mo->s[i].out_id[j]; if (label[k][t + 1] != mo->label[j_id]) continue; e_index = get_emission_index (mo, j_id, O[k][t + 1], t + 1); if (e_index != -1) r->a_num[i][j] += (seq_w[k] * alpha[t][i] * mo->s[i].out_a[j] * mo->s[j_id].b[e_index] * beta[t + 1][j_id] * (1.0 / scale[t + 1])); } } /* B: last iteration for t==T_k-1 */ t = T_k - 1; if (!(mo->s[i].fix) && (mo->label[i] == label[k][t])) { e_index = get_emission_index (mo, i, O[k][t], t); if (e_index != -1) { gamma = seq_w[k] * alpha[t][i] * beta[t][i]; r->b_num[i][e_index] += gamma; r->b_denom[i][e_index / (mo->M)] += gamma; } } } } else { str = ighmm_mprintf (NULL, 0, "warning: sequence %d can't be built from model\n", k); GHMM_LOG(LCONVERTED, str); m_free (str); } ighmm_reestimate_free_matvek (alpha, beta, scale, T_k); } /* for (k = 0; k < seq_number; k++) */ if (valid) { /* new parameter lambda: set directly in model */ if (reestimate_setlambda (r, mo) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* printf ("---- reestimate: after normalization ----\n"); */ /* printf("Emission:\n"); */ /* ghmm_dmodel_B_print(stdout, mo, "\t", " ", "\n"); */ /* only test: */ /* if (ghmm_dmodel_check(mo) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } */ } else { /* NO sequence can be built from model mo! */ *log_p = +1; } return (0);FREE: ighmm_reestimate_free_matvek (alpha, beta, scale, T_k);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return (-1);# undef CUR_PROC} /* reestimate_one_step_label *//*============================================================================*/int ghmm_dmodel_label_baum_welch (ghmm_dmodel * mo, ghmm_dseq * sq){# define CUR_PROC "ghmm_dl_baum_welch" return ghmm_dmodel_label_baum_welch_nstep (mo, sq, GHMM_MAX_ITER_BW, GHMM_EPS_ITER_BW);# undef CUR_PROC} /* ghmm_dmodel_baum_welch *//*============================================================================*/int ghmm_dmodel_label_baum_welch_nstep (ghmm_dmodel * mo, ghmm_dseq * sq, int max_step, double likelihood_delta){# define CUR_PROC "ghmm_dl_baum_welch" int n, k, valid; double log_p, log_p_old, log_p_k, diff; local_store_t *r = NULL; int res = -1; char *str; /* local store for all iterations */ r = reestimate_alloc (mo); if (!r) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; }; log_p_old = -DBL_MAX; n = 1; /* main loop Baum-Welch-Alg. */ while (n <= max_step) { if (reestimate_one_step_label (mo, r, sq->seq_number, sq->seq_len, sq->seq, sq->state_labels, &log_p, sq->seq_w) == -1) { str = ighmm_mprintf (NULL, 0, "reestimate_one_step_label false (%d.step)\n", n); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } if (n == 1) printf ("%8.5f (-log_p input model)\n", -log_p); else printf ("%8.5f (-log_p)\n", -log_p); if (log_p == +1) { printf ("Reestimate stopped: No sequence can be built from model mo!\n"); break; } diff = log_p - log_p_old; /* error in convergence ? */ if (diff < -GHMM_EPS_PREC) { str = ighmm_mprintf (NULL, 0, "No convergence: log P < log P-old! (n = %d)\n", n); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } else if (log_p > GHMM_EPS_PREC) { str = ighmm_mprintf (NULL, 0, "No convergence: log P > 0! (n = %d)\n", n); GHMM_LOG(LCONVERTED, str); m_free (str); goto STOP; } /* stop iterations? */ if (diff < fabs ((double) likelihood_delta * log_p)) { printf ("Convergence after %d steps\n", n); break; } else { /* for next iteration */ log_p_old = log_p; reestimate_init (r, mo); /* sets all fields to zero */ n++; } } /* while (n <= MAX_ITER) */ /* log_p of reestimated model */ log_p = 0.0; valid = 0; for (k = 0; k < sq->seq_number; k++) { if (ghmm_dmodel_label_logp (mo, sq->seq[k], sq->state_labels[k], sq->seq_len[k], &log_p_k) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } if (log_p_k != +1) { log_p += log_p_k; valid = 1; } } if (!valid) log_p = +1; printf ("%8.5f (-log_p optimized model)\n", -log_p); /* check new parameter for plausibility */ /* if (ghmm_dmodel_check(mo) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } */ res = 0;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ reestimate_free (&r, mo->N); return res;# undef CUR_PROC} /* ghmm_dl_baum_welch_nstep */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -