📄 reestimate.c
字号:
/********************************************************************************* This file is part of the General Hidden Markov Model Library,* GHMM version 0.8_beta1, see http://ghmm.org** Filename: ghmm/ghmm/reestimate.c* Authors: Bernhard Knab, Benjamin Georgi, Janne Grunau** Copyright (C) 1998-2004 Alexander Schliep* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,* Berlin** Contact: schliep@ghmm.org** This library is free software; you can redistribute it and/or* modify it under the terms of the GNU Library General Public* License as published by the Free Software Foundation; either* version 2 of the License, or (at your option) any later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU* Library General Public License for more details.** You should have received a copy of the GNU Library General Public* License along with this library; if not, write to the Free* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*** This file is version $Revision: 1713 $* from $Date: 2006-10-16 16:06:28 +0200 (Mon, 16 Oct 2006) $* last change by $Author: grunau $.********************************************************************************/#ifdef HAVE_CONFIG_H# include "../config.h"#endif#include <float.h>#include <math.h>#include "ghmm.h"#include "mes.h"#include "mprintf.h"#include "reestimate.h"#include "matrix.h"#include "model.h"#include "foba.h"#include "ghmm_internals.h"typedef struct local_store_t { double *pi_num; double pi_denom; double **a_num; double *a_denom; double **b_num; double **b_denom;} local_store_t;static local_store_t *reestimate_alloc (const ghmm_dmodel * mo);static int reestimate_free (local_store_t ** r, int N);static int reestimate_init (local_store_t * r, const ghmm_dmodel * mo);static int reestimate_setlambda (local_store_t * r, ghmm_dmodel * mo);static int reestimate_one_step (ghmm_dmodel * mo, local_store_t * r, int seq_number, int *seq_length, int **O, double *log_p, double *seq_w);static double nologSum(double* vec, int len) { int i; double retval=vec[0]; for (i=1; i<len; i++) retval += vec[i]; return retval;}/*----------------------------------------------------------------------------*/static local_store_t *reestimate_alloc (const ghmm_dmodel * mo){# define CUR_PROC "reestimate_alloc" int i; local_store_t *r = NULL; ARRAY_CALLOC(r, 1); ARRAY_CALLOC(r->pi_num, mo->N); ARRAY_CALLOC(r->a_num, mo->N); for (i=0; i<mo->N; i++) ARRAY_CALLOC(r->a_num[i], mo->s[i].out_states); ARRAY_CALLOC(r->a_denom, mo->N); ARRAY_MALLOC(r->b_num, mo->N); ARRAY_MALLOC(r->b_denom, mo->N); if (mo->model_type & GHMM_kHigherOrderEmissions) for (i=0; i<mo->N; i++) { ARRAY_CALLOC(r->b_num[i], ghmm_ipow(mo, mo->M, mo->order[i] + 1)); ARRAY_CALLOC(r->b_denom[i], ghmm_ipow (mo, mo->M, mo->order[i])); } else for (i=0; i<mo->N; i++) { ARRAY_CALLOC(r->b_num[i], mo->M); ARRAY_CALLOC(r->b_denom[i], 1); } return (r);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ reestimate_free (&r, mo->N); return (NULL);# undef CUR_PROC} /* reestimate_alloc *//*----------------------------------------------------------------------------*/static int reestimate_free (local_store_t ** r, int N){# define CUR_PROC "reestimate_free" int i; mes_check_ptr (r, return (-1)); if (!*r) return (0); m_free ((*r)->pi_num); if ((*r)->a_num){ for (i = 0; i < N; i++){ m_free ((*r)->a_num[i]); } } m_free ((*r)->a_num); m_free ((*r)->a_denom); if ((*r)->b_num){ for (i = 0; i < N; i++){ m_free ((*r)->b_num[i]); } } m_free ((*r)->b_num); if ((*r)->b_denom){ for (i = 0; i < N; i++){ m_free ((*r)->b_denom[i]); } } m_free ((*r)->b_denom); m_free (*r); return (0);# undef CUR_PROC} /* reestimate_free *//*----------------------------------------------------------------------------*/static int reestimate_init(local_store_t * r, const ghmm_dmodel * mo) {# define CUR_PROC "reestimate_init" int i, j, m, size; r->pi_denom = 0.0; for (i=0; i<mo->N; i++) { r->pi_num[i] = 0.0; r->a_denom[i] = 0.0; for (j=0; j<mo->s[i].out_states; j++) r->a_num[i][j] = 0.0; if (mo->model_type & GHMM_kHigherOrderEmissions) { size = ghmm_ipow(mo, mo->M, mo->order[i]); for (m=0; m<size; m++) r->b_denom[i][m] = 0.0; size *= mo->M; for (m=0; m<size; m++) r->b_num[i][m] = 0.0; } else { r->b_denom[i][0] = 0.0; for (m=0; m<mo->M; m++) r->b_num[i][m] = 0.0; } } return (0);# undef CUR_PROC} /* reestimate_init *//*----------------------------------------------------------------------------*/int ighmm_reestimate_alloc_matvek (double ***alpha, double ***beta, double **scale, int T, int N){# define CUR_PROC "ighmm_reestimate_alloc_matvek" int res = -1; *alpha = ighmm_cmatrix_stat_alloc (T, N); if (!(*alpha)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } *beta = ighmm_cmatrix_stat_alloc (T, N); if (!(*beta)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } ARRAY_CALLOC (*scale, T); res = 0;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return (res);# undef CUR_PROC} /* ighmm_reestimate_alloc_matvek *//*----------------------------------------------------------------------------*/int ighmm_reestimate_free_matvek (double **alpha, double **beta, double *scale, int T){# define CUR_PROC "ighmm_reestimate_free_matvek" ighmm_cmatrix_stat_free (&alpha); ighmm_cmatrix_stat_free (&beta); m_free (scale); return (0);# undef CUR_PROC} /* ighmm_reestimate_free_matvek *//*----------------------------------------------------------------------------*/void ghmm_dmodel_update_tie_groups (ghmm_dmodel * mo) {#define CUR_PROC "ghmm_dmodel_update_tied_groups" int i, j, k; int bi_len; int nr=0; double *new_emissions; char * str; /* printf("** Start of ghmm_dmodel_update_tied_groups **\n"); */ /* do nothing if there are no tied emissions */ if (!(mo->model_type & GHMM_kTiedEmissions)) { GHMM_LOG(LWARN, "No tied emissions. Exiting."); return; } if (mo->model_type & GHMM_kHigherOrderEmissions) { ARRAY_MALLOC(new_emissions, ghmm_ipow(mo, mo->M, mo->maxorder+1)); } else { ARRAY_MALLOC(new_emissions, mo->M); } for (i=0; i<mo->N; i++) { /* find tie group leaders */ if (mo->tied_to[i] == i) { if (mo->model_type & GHMM_kHigherOrderEmissions) bi_len = ghmm_ipow(mo, mo->M, mo->order[i] + 1); else bi_len = mo->M; if (mo->model_type & GHMM_kSilentStates && mo->silent[i]) { str = ighmm_mprintf(NULL, 0, "Tie group leader %d is silent.", i); GHMM_LOG(LWARN, str); m_free(str); nr = 0; /* initializing with zeros */ for (k=0; k<bi_len; k++) new_emissions[k] = 0.0; } else { nr = 1; /* initializing with tie group leader emissions */ for (k=0; k<bi_len; k++) new_emissions[k] = mo->s[i].b[k]; } /* finding tie group members */ for (j=i+1; j<mo->N; j++) { if (mo->tied_to[j] == i && (!(mo->model_type & GHMM_kHigherOrderEmissions) || mo->order[i] == mo->order[j])) { /* silent states have no contribution to the pooled emissions within a group */ if (!(mo->model_type & GHMM_kSilentStates) || (mo->silent[j] == 0)) { nr += 1; /* printf(" tie group member %d -> leader %d.\n",j,i); */ /* summing up emissions in the tie group */ for (k=0; k<bi_len; k++) new_emissions[k] += mo->s[j].b[k]; } else { str = ighmm_mprintf(NULL, 0, "Tie group member %d is silent.", j); GHMM_LOG(LWARN, str); m_free(str); } } } /* updating emissions */ if (nr > 1) for (j=i; j < mo->N; j++) { /* states within one tie group are required to have the same order */ if (mo->tied_to[j] == i && (!(mo->model_type & GHMM_kHigherOrderEmissions) || mo->order[i] == mo->order[j]) && (!(mo->model_type & GHMM_kSilentStates) || (mo->silent[j] == 0))) for (k = 0; k < bi_len; k++) { mo->s[j].b[k] = new_emissions[k] / nr; /* printf("s(%d)[%d] -> %f / %f = %f\n", j, k, new_emissions[k], nr,mo->s[j].b[k]); */ } } else { str = ighmm_mprintf(NULL, 0, "The tie group with leader %d has only one non-silent state. Kind of pointless!", i); GHMM_LOG(LINFO, str); m_free(str); } } }STOP: /* Label STOP from ARRAY_[CM]ALLOC */ m_free (new_emissions);#undef CUR_PROC} /* ghmm_dmodel_update_tied_groups *//*----------------------------------------------------------------------------*/static int reestimate_setlambda (local_store_t * r, ghmm_dmodel * mo){# define CUR_PROC "reestimate_setlambda" int res = -1; int h, i, j, m, l, j_id, positive; double factor, p_i; int hist, col, size, reachable; char *str; mes_check_0 (r->pi_denom, goto STOP); for (i = 0; i < mo->N; i++) { reachable = 1; positive = 0; /* Pi */ mo->s[i].pi = r->pi_num[i] / r->pi_denom; /* A */ /* note: denom. might be 0; never reached state? */ p_i = 0.0; if (r->a_denom[i] < GHMM_EPS_PREC) { for (h = 0; h < mo->s[i].in_states; h++) { p_i += mo->s[i].in_a[h]; } if (p_i == 0.0) { if (h == 0) str = ighmm_mprintf(NULL, 0, "State %d can't be reached (no in_states)", i); else str = ighmm_mprintf (NULL, 0, "State %d can't be reached (prob = 0.0)", i); GHMM_LOG(LINFO, str); m_free (str); reachable = 0; } factor = 0.0; } else factor = (1 / r->a_denom[i]); for (j = 0; j < mo->s[i].out_states; j++) { /* TEST: denom. < numerator */ if ((r->a_denom[i] - r->a_num[i][j]) <= -GHMM_EPS_PREC) { GHMM_LOG(LCONVERTED, " numerator > denom!\n"); } mo->s[i].out_a[j] = r->a_num[i][j] * factor; if (r->a_num[i][j] >= GHMM_EPS_PREC) positive = 1; /* important: also update in_a */ l = 0; j_id = mo->s[i].out_id[j]; while (l < mo->s[j_id].in_states) if (mo->s[j_id].in_id[l] == i) break; else l++; if (l == mo->s[j_id].in_states) { GHMM_LOG(LCONVERTED, "no corresponding in_a to out_a found!\n"); goto STOP; } mo->s[j_id].in_a[l] = mo->s[i].out_a[j]; } /*if (!positive) { str = ighmm_mprintf(NULL, 0, "All numerator a[%d][j] == 0 (denom=%.4f, P(in)=%.4f)!\n", i, r->a_denom[i], p_i); GHMM_LOG(LCONVERTED, str); m_free(str); } */ /* if fix, continue to next state */ if (mo->s[i].fix) continue; /* B */ if (mo->model_type & GHMM_kHigherOrderEmissions) size = ghmm_ipow (mo, mo->M, mo->order[i]); else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -