pviterbi.c
来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 849 行 · 第 1/3 页
C
849 行
/********************************************************************************* This file is part of the General Hidden Markov Model Library,* GHMM version 0.8_beta1, see http://ghmm.org** Filename: ghmm/ghmm/viterbi.c* Authors: Wasinee Rungsarityotin, Benjamin Georgi** Copyright (C) 1998-2004 Alexander Schliep* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,* Berlin** Contact: schliep@ghmm.org** This library is free software; you can redistribute it and/or* modify it under the terms of the GNU Library General Public* License as published by the Free Software Foundation; either* version 2 of the License, or (at your option) any later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU* Library General Public License for more details.** You should have received a copy of the GNU Library General Public* License along with this library; if not, write to the Free* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*** This file is version $Revision: 1713 $* from $Date: 2006-10-16 16:06:28 +0200 (Mon, 16 Oct 2006) $* last change by $Author: grunau $.********************************************************************************//* Possible sources of errors: initialisation, pushback (the loop after) */#ifdef HAVE_CONFIG_H# include "../config.h"#endif#include <float.h>#include <math.h>#include <assert.h>#include <stdlib.h>#include "ghmm.h"#include "mprintf.h"#include "mes.h"#include "matrix.h"#include "pmodel.h"#include "psequence.h"#include "pviterbi.h"#include "ghmm_internals.h"typedef struct plocal_store_t { /** precomputed log probabilities for transitions into the states for each transition class **/ double *** log_in_a; /** precomputed log probabilities for each state for the emissions **/ double ** log_b; /** lookback matrix for the last offset steps **/ double *** phi; /** log probabilities for the current u, v and every state **/ double *phi_new; /** traceback matrix **/ int ***psi; /** for convinience store a pointer to the model **/ ghmm_dpmodel * mo; /** for the debug mode store information of matrix sizes **/ /** length of sequence X determines size of psi **/ int len_x; /** length of sequence Y determines size of phi and psi **/ int len_y; /** non functional stuff **/ int *topo_order; int topo_order_length;} plocal_store_t; static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv); static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y); static int pviterbi_free(plocal_store_t **v, int n, int len_x, int len_y, int max_offset_x, int max_offset_y); static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y); static double get_phi(plocal_store_t * pv, int x, int y, int offset_x, int offset_y, int state); static void set_phi(plocal_store_t * pv, int x, int y, int ghmm_dstate, double prob); static void push_back_phi(plocal_store_t * pv, int length_y); static void set_psi(plocal_store_t * pv, int x, int y, int ghmm_dstate, int from_state);/*============================================================================*/static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y) {#define CUR_PROC "pviterbi_alloc" plocal_store_t* v = NULL; int i, j; ARRAY_CALLOC (v, 1); v->mo = mo; v->len_y = len_y; v->len_x = len_x; /* Allocate the log_in_a's -> individal lenghts */ ARRAY_CALLOC (v->log_in_a, mo->N); /* first index of log_in_a: target state */ for (j = 0; j < mo->N; j++){ /* second index: source state */ ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states); for (i=0; i<mo->s[j].in_states; i++) { /* third index: transition classes of source state */ ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses); } } ARRAY_CALLOC (v->log_b, mo->N); for (j=0; j<mo->N; j++) { ARRAY_CALLOC (v->log_b[j], ghmm_dpmodel_emission_table_size(mo, j) + 1); } if (!(v->log_b)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} v->phi = ighmm_cmatrix_3d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N); if (!(v->phi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} ARRAY_CALLOC (v->phi_new, mo->N); v->psi = ighmm_dmatrix_3d_alloc(len_x + mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N); if (!(v->psi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} v->topo_order_length = 0; ARRAY_CALLOC (v->topo_order, mo->N); return(v);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ pviterbi_free((&v), mo->N, len_x, len_y, mo->max_offset_x, mo->max_offset_y); return(NULL);#undef CUR_PROC} /* viterbi_alloc *//*============================================================================*/static int pviterbi_free(plocal_store_t **v, int n, int len_x, int len_y, int max_offset_x, int max_offset_y) {#define CUR_PROC "pviterbi_free" int i, j; mes_check_ptr(v, return(-1)); if( !*v ) return(0); for (j = 0; j < n; j++) { for (i=0; i < (*v)->mo->s[j].in_states; i++) m_free((*v)->log_in_a[j][i]); m_free((*v)->log_in_a[j]); } m_free((*v)->log_in_a); for (j=0; j<n; j++) m_free((*v)->log_b[j]); m_free((*v)->log_b); ighmm_cmatrix_3d_free( &((*v)->phi), max_offset_x + 1, len_y + max_offset_y + 1); m_free((*v)->phi_new); ighmm_dmatrix_3d_free( &((*v)->psi), len_x + max_offset_x + 1, len_y + max_offset_y + 1); m_free((*v)->topo_order); (*v)->mo = NULL; m_free(*v); return(0);#undef CUR_PROC} /* viterbi_free *//*============================================================================*/static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv) { int j, k; ghmm_dpmodel * mo; printf("Local store for pair HMM viterbi algorithm\n"); printf("Log in a:\n"); mo = pv->mo; for (j = 0; j < mo->N; j++){ printf("state %i in states %i\n", j, mo->s[j].in_states); for (k=0; k<mo->s[j].in_states; k++) printf("FIXME: log_in_a has three dimensions!"/*From: %i %f\n", mo->s[j].in_id[k], pv->log_in_a[j][k]*/); } printf("Log b:\n"); for (j = 0; j < mo->N; j++){ printf("state %i #chars: %i\n", j, ghmm_dpmodel_emission_table_size(mo, j)); for (k=0; k<ghmm_dpmodel_emission_table_size(mo, j); k++) printf("Emission prob char: %i %f\n", k, pv->log_b[j][k]); } }/*============================================================================*/static void pviterbi_precompute( ghmm_dpmodel *mo, plocal_store_t *v) {#define CUR_PROC "pviterbi_precompute" int i, j, emission, t_class; /* Precomputing the log(a_ij) */ for (j = 0; j < mo->N; j++) for (i = 0; i < mo->s[j].in_states; i++) for (t_class = 0; t_class < mo->s[mo->s[j].in_id[i]].kclasses; t_class++){ if ( mo->s[j].in_a[i][t_class] == 0.0 ) /* DBL_EPSILON ? */ v->log_in_a[j][i][t_class] = +1; /*Not used any further in the calculations */ else v->log_in_a[j][i][t_class] = log( mo->s[j].in_a[i][t_class] ); } /* Precomputing the log emission probabilities for each state*/ for (j = 0; j < mo->N; j++) { for (emission = 0; emission < ghmm_dpmodel_emission_table_size(mo,j); emission++) { if (1) { if ( mo->s[j].b[emission] == 0.0 ) /* DBL_EPSILON ? */ v->log_b[j][emission] = +1; else v->log_b[j][emission] = log( mo->s[j].b[emission] ); } else{ v->log_b[j][emission] = 0.0; } } v->log_b[j][emission] = 1; /* the last field is for invalid emissions */ }#undef CUR_PROC}/* viterbi_precompute *//*============================================================================*//** *//* static void p__viterbi_silent( ghmm_dmodel *mo, int t, plocal_store_t *v ) *//* { *//* int topocount; *//* int i, k; *//* double max_value, value; *//* for ( topocount = 0; topocount < mo->topo_order_length; topocount++) { *//* k = mo->topo_order[topocount]; *//* if ( mo->silent[k] ) { /\* Silent states *\/ *//* /\* Determine the maximum *\/ *//* /\* max_phi = phi[i] + log_in_a[j][i] ... *\/ *//* max_value = -DBL_MAX; *//* v->psi[t][k] = -1; *//* for (i = 0; i < mo->s[k].in_states; i++) { */ /* if ( v->phi[ mo->s[k].in_id[i] ] != +1 && *//* v->log_in_a[k][i] != +1) { *//* value = v->phi[ mo->s[k].in_id[i] ] + v->log_in_a[k][i]; *//* if (value > max_value) { *//* max_value = value; *//* v->psi[t][k] = mo->s[k].in_id[i]; *//* } *//* } *//* } */ /* /\* No maximum found (that is, state never reached) *//* or the output O[t] = 0.0: *\/ *//* if (max_value == -DBL_MAX) { *//* v->phi[k] = +1; *//* } else { *//* v->phi[k] = max_value; *//* } */ /* } *//* } *//* } *//*============================================================================*/static double sget_log_in_a(plocal_store_t * pv, int i, int j, ghmm_dpseq * X, ghmm_dpseq * Y, int index_x, int index_y) { /* determine the transition class for the source ghmm_dstate */ int id = pv->mo->s[i].in_id[j]; int cl = pv->mo->s[id].class_change->get_class(pv->mo, X, Y, index_x,index_y, pv->mo->s[id].class_change->user_data); return pv->log_in_a[i][j][cl];}/*============================================================================*/static double log_b(plocal_store_t * pv, int state, int emission) {#ifdef DEBUG if (ghmm_dstate > pv->mo->N) fprintf(stderr, "log_b: State index out of bounds %i > %i\n", ghmm_dstate, pv->mo->N); if (emission > ghmm_dpmodel_emission_table_size(pv->mo, state)) fprintf(stderr, "log_b: Emission index out of bounds %i > %i for ghmm_dstate %i\n", emission, ghmm_dpmodel_emission_table_size(pv->mo, state), state);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?