📄 reestimate.c
字号:
size = 1; /* If all in_a's are zero, the state can't be reached. Set all b's to -1.0 */ if (!reachable) { for (hist = 0; hist < size; hist++) { col = hist * mo->M; for (m = col; m < col + mo->M; m++) { mo->s[i].b[m] = -1.0; } } } else { for (hist = 0; hist < size; hist++) { /* If the denominator is very small, we have not seen many emissions in this state with this history. We are conservative and just skip them. */ if (r->b_denom[i][hist] < GHMM_EPS_PREC) continue; else factor = (1.0 / r->b_denom[i][hist]); positive = 0; /* TEST: denom. < numerator */ col = hist * mo->M; for (m = col; m < col + mo->M; m++) { if ((r->b_denom[i][hist] - r->b_num[i][m]) <= -GHMM_EPS_PREC) { str = ighmm_mprintf (NULL, 0, "numerator b (%.4f) > denom (%.4f)!\n", r->b_num[i][m], r->b_denom[i][hist]); GHMM_LOG(LCONVERTED, str); m_free (str); } mo->s[i].b[m] = r->b_num[i][m] * factor; if (mo->s[i].b[m] >= GHMM_EPS_PREC) positive = 1; } if (!positive) { str = ighmm_mprintf (NULL, 0, "All numerator b[%d][%d-%d] == 0 (denom = %g)!\n", i, col, col + mo->M, r->b_denom[i][hist]); GHMM_LOG(LCONVERTED, str); m_free (str); } } /* for each history */ } } /* for (i = 0 .. < mo->N) */ res = 0; if (mo->model_type & GHMM_kTiedEmissions) ghmm_dmodel_update_tie_groups (mo);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return (res);# undef CUR_PROC} /* reestimate_setlambda *//*----------------------------------------------------------------------------*/static int reestimate_one_step (ghmm_dmodel * mo, local_store_t * r, int seq_number, int *seq_length, int **O, double *log_p, double *seq_w){# define CUR_PROC "reestimate_one_step" int res = -1; int k, i, j, t, j_id, valid; int e_index; char * str; double **alpha = NULL; double **beta = NULL; double *scale = NULL; int T_k=0; double gamma; double log_p_k; /* first set maxorder to zero if model_type & kHigherOrderEmissions is FALSE TODO XXX use model->maxorder only if model_type & kHigherOrderEmissions is TRUE */ if (!(mo->model_type & GHMM_kHigherOrderEmissions)) mo->maxorder = 0; *log_p = 0.0; valid = 0; /* loop over all sequences */ for (k = 0; k < seq_number; k++) { mo->emission_history = 0; T_k = seq_length[k]; /* current seq. length */ /* initialization of matrices and vector depends on T_k */ if (ighmm_reestimate_alloc_matvek (&alpha, &beta, &scale, T_k, mo->N) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto FREE; } if (ghmm_dmodel_forward (mo, O[k], T_k, alpha, scale, &log_p_k) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto FREE; } if (log_p_k != +1) { /* O[k] can be generated */ *log_p += log_p_k; valid = 1; if (ghmm_dmodel_backward (mo, O[k], T_k, beta, scale) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto FREE; } /* loop over all states */ for (i = 0; i < mo->N; i++) { /* Pi */ r->pi_num[i] += seq_w[k] * alpha[0][i] * beta[0][i]; r->pi_denom += seq_w[k] * alpha[0][i] * beta[0][i]; for (t=0; t < T_k-1; t++) { /* B */ if (!mo->s[i].fix) { e_index = get_emission_index(mo, i, O[k][t], t); if (e_index != -1) { gamma = seq_w[k] * alpha[t][i] * beta[t][i]; r->b_num[i][e_index] += gamma; r->b_denom[i][e_index / (mo->M)] += gamma; } } update_emission_history(mo, O[k][t]); /* A */ r->a_denom[i] += (seq_w[k] * alpha[t][i] * beta[t][i]); for (j=0; j < mo->s[i].out_states; j++) { j_id = mo->s[i].out_id[j]; e_index = get_emission_index(mo, j_id, O[k][t+1], t+1); if (e_index != -1) r->a_num[i][j] += (seq_w[k] * alpha[t][i] * mo->s[i].out_a[j] * mo->s[j_id].b[e_index] * beta[t+1][j_id] * (1.0 / scale[t + 1])); /* c[t] = 1/scale[t] */ } } /* B: last iteration for t==T_k-1 */ t = T_k - 1; if (!mo->s[i].fix) { e_index = get_emission_index (mo, i, O[k][t], t); if (e_index != -1) { gamma = seq_w[k] * alpha[t][i] * beta[t][i]; r->b_num[i][e_index] += gamma; r->b_denom[i][e_index / (mo->M)] += gamma; } } } } /* if (log_p_k != +1) */ else { str = ighmm_mprintf(NULL, 0, "O(%d) can't be built from model mo!\n", k); GHMM_LOG(LWARN, str); m_free(str); } ighmm_reestimate_free_matvek(alpha, beta, scale, T_k); } /* for (k = 0; k < seq_number; k++) */ if (valid) { /* new parameter lambda: set directly in model */ if (reestimate_setlambda(r, mo) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* printf ("---- reestimate: after normalization ----\n"); */ /* printf("Emission:\n"); ghmm_dmodel_B_print(stdout, mo, "\t", " ", "\n"); */ /* only test: */ if (ghmm_dmodel_check(mo) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } else { /* NO sequence can be built from model mo! */ *log_p = +1; } res = 0;STOP: return (res);FREE: ighmm_reestimate_free_matvek(alpha, beta, scale, T_k); return (res);# undef CUR_PROC} /* reestimate_one_step *//*---------------------------------------------------------------------------*/static int reestimate_one_step_lean (ghmm_dmodel * mo, local_store_t * r, int seq_number, int *seq_length, int **seqs, double *log_p, double *seq_w){# define CUR_PROC "reestimate_one_step_lean" int res = -1; int i, t, k, e_index, len; int m, j, j_id, g, g_id, l, s, h; int size; int * O; double c_t; double * alpha_last_col=NULL; double * alpha_curr_col=NULL; double * switching_tmp; double scale, old_scale, scalingf; double * summands=NULL; local_store_t * * curr_est=NULL, * * last_est=NULL, * * switch_lst; /* allocating memory for two columns of alpha matrix */ ARRAY_CALLOC (alpha_last_col, mo->N); ARRAY_CALLOC (alpha_curr_col, mo->N); /* allocating 2*N local_store_t */ ARRAY_CALLOC (last_est, mo->N); for (i=0; i<mo->N; i++) last_est[i] = reestimate_alloc(mo); ARRAY_CALLOC (curr_est, mo->N); for (i=0; i<mo->N; i++) curr_est[i] = reestimate_alloc(mo); /* temporary array to hold logarithmized summands for sums over probabilities */ ARRAY_CALLOC (summands, m_max(mo->N,ghmm_ipow(mo,mo->M,mo->maxorder+1))+1); for (k=0; k<seq_number; k++) { len = seq_length[k]; O = seqs[k]; ghmm_dmodel_forward_init(mo, alpha_last_col, O[0], &scale); if (scale < GHMM_EPS_PREC) { /* means: first symbol can't be generated by hmm */ *log_p = +1; goto STOP; } *log_p = - log(1/scale); for (t=1; t<len; t++) { old_scale = scale; scale = 0.0; update_emission_history(mo, O[t-1]); /* iterate over non-silent states */ for (i=0; i<mo->N; i++) { /* printf(" akt_ state %d\n",i);*/ e_index = get_emission_index(mo, i, O[t], t); if (e_index != -1){ alpha_curr_col[i] = ghmm_dmodel_forward_step(&mo->s[i], alpha_last_col, mo->s[i].b[e_index]); scale += alpha_curr_col[i]; } else alpha_curr_col[i] = 0; } if (scale < GHMM_EPS_PREC) { GHMM_LOG(LCONVERTED, "scale kleiner als eps\n"); /* O-string can't be generated by hmm */ *log_p = +1.0; break; } c_t = 1/scale; for (i=0; i<mo->N; i++) alpha_curr_col[i] *= c_t; /*sum log(c[t]) scaling values to get log( P(O|lambda) ) */ *log_p -= log(c_t); scalingf = 1/old_scale; for (m=0; m<mo->N; m++) { for (i=0; i<mo->N; i++) { /* computes estimates for the numerator of transition probabilities */ for (j=0; j<mo->s[i].out_states; j++) { j_id = mo->s[i].out_id[j]; for (g=0; g<mo->s[j_id].in_states; g++) { g_id = mo->s[j_id].out_id[g]; e_index = get_emission_index(mo, g_id, O[t], t); /* scales all summands with the current */ summands[g] = last_est[m]->a_num[i][j] * mo->s[j_id].in_a[g] * mo->s[g_id].b[e_index] * scalingf; } if (j_id == m) { e_index = get_emission_index(mo, j_id, O[t], t); /* alpha is scaled. no other scaling necessary */ summands[g++] = alpha_last_col[i] * mo->s[i].out_a[j] * mo->s[j_id].b[e_index]; } curr_est[m]->a_num[i][j] = nologSum(summands, g); }#ifdef calculate_denominators_extra /* computes denominator of transition probabilities */ for (g=0; g<mo->s[m].in_states; g++) { g_id = mo->s[m].in_id[g]; e_index = get_emission_index(mo, m, O[t], t); /* scales all summands with the current factor */ summands[g] = last_est[m]->a_denom[i] * mo->s[m].in_a[g] * mo->s[m].b[e_index] * scalingf; } if (i == m) { for (l=0; l<mo->s[i].out_states; l++) if (mo->s[i].out_id[l] == i) break; if (l<mo->s[i].out_states) { e_index = get_emission_index(mo, i, O[t], t); /* alpha is scaled. no other scaling necessary */ summands[++g] = alpha_last_col[i] * mo->s[i].out_a[l] * mo->s[m].b[e_index]; } } curr_est[m]->a_denom[i] = nologSum(summands, g);#endif /* computes estimates for the numerator of emmission probabilities*/ if (mo->model_type & GHMM_kHigherOrderEmissions) size = ghmm_ipow(mo,mo->M, mo->order[i]); else size = 1; for (h=0; h<size; h++) for (s=h*mo->M; s<h*mo->M+mo->M; s++) { for (g=0; g<mo->s[m].in_states; g++) { g_id = mo->s[m].out_id[g]; e_index = get_emission_index(mo, g_id, O[t], t); /* scales all summands with the last scaling factor of alpha */ summands[g] = last_est[m]->b_num[i][s] * mo->s[m].in_a[g] * mo->s[g_id].b[e_index] * scalingf; } curr_est[m]->b_num[i][s] = nologSum(summands, g); } e_index = get_emission_index(mo, m, O[t], t); if (i == m) { for (l=0; l<mo->s[m].out_states; l++) if (mo->s[m].out_id[l] == m) break; if (l<mo->s[m].out_states) /* alpha is scaled. no other scaling necessary */ curr_est[m]->b_num[i][e_index] += alpha_last_col[i] * mo->s[m].out_a[l] * mo->s[m].b[e_index]; } } } /* switching pointers of alpha_curr_col and alpha_last_col */ switching_tmp = alpha_last_col; alpha_last_col = alpha_curr_col; alpha_curr_col = switching_tmp; switch_lst = last_est; last_est = curr_est; curr_est = switch_lst; } /* filling the usual reestimate arrays by summing all states */ for (m=0; m<mo->N; m++) { curr_est[m]->pi_denom = 0; for (i=0; i<mo->N; i++) { /* PI */ /* XXX calculate the estimates for pi numerator */ curr_est[m]->pi_num[i] = mo->s[i].pi; r->pi_num[i] += seq_w[k] * curr_est[m]->pi_num[i]; curr_est[m]->pi_denom += curr_est[m]->pi_num[i]; /* A */ curr_est[m]->a_denom[i] = 0; for (j=0; j<mo->s[i].out_states; j++) { r->a_num[i][j] += seq_w[k] * curr_est[m]->a_num[i][j]; curr_est[m]->a_denom[i] += curr_est[m]->a_num[i][j]; } r->a_denom[i] += seq_w[k] * curr_est[m]->a_denom[i]; /* B */ for (h=0; h<size; h++) { curr_est[m]->b_denom[i][h] = 0; for (s=h*mo->M; s<h*mo->M+mo->M; s++) { r->b_num[i][s] += seq_w[k] * curr_est[m]->b_num[i][s]; curr_est[m]->b_denom[i][h] += curr_est[m]->b_num[i][s]; } r->b_denom[i][h] += seq_w[k] * curr_est[m]->b_denom[i][h]; } } /* PI */ r->pi_denom += seq_w[k] * curr_est[m]->pi_denom; } } if (*log_p == 1.0) res = -1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -