pviterbi.c

来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 849 行 · 第 1/3 页

C
849
字号
/*********************************************************************************       This file is part of the General Hidden Markov Model Library,*       GHMM version 0.8_beta1, see http://ghmm.org**       Filename: ghmm/ghmm/viterbi.c*       Authors:  Wasinee Rungsarityotin, Benjamin Georgi**       Copyright (C) 1998-2004 Alexander Schliep*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,*                               Berlin**       Contact: schliep@ghmm.org**       This library is free software; you can redistribute it and/or*       modify it under the terms of the GNU Library General Public*       License as published by the Free Software Foundation; either*       version 2 of the License, or (at your option) any later version.**       This library is distributed in the hope that it will be useful,*       but WITHOUT ANY WARRANTY; without even the implied warranty of*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU*       Library General Public License for more details.**       You should have received a copy of the GNU Library General Public*       License along with this library; if not, write to the Free*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA***       This file is version $Revision: 1713 $*                       from $Date: 2006-10-16 16:06:28 +0200 (Mon, 16 Oct 2006) $*             last change by $Author: grunau $.********************************************************************************//* Possible sources of errors: initialisation, pushback (the loop after) */#ifdef HAVE_CONFIG_H#  include "../config.h"#endif#include <float.h>#include <math.h>#include <assert.h>#include <stdlib.h>#include "ghmm.h"#include "mprintf.h"#include "mes.h"#include "matrix.h"#include "pmodel.h"#include "psequence.h"#include "pviterbi.h"#include "ghmm_internals.h"typedef struct plocal_store_t {  /** precomputed log probabilities for transitions into the states       for each transition class **/  double *** log_in_a;  /** precomputed log probabilities for each state for the emissions  **/  double ** log_b;  /** lookback matrix for the last offset steps **/  double *** phi;  /** log probabilities for the current u, v and every state **/  double *phi_new;  /** traceback matrix **/  int ***psi;  /** for convinience store a pointer to the model **/  ghmm_dpmodel * mo;  /** for the debug mode store information of matrix sizes **/  /** length of sequence X determines size of psi **/  int len_x;  /** length of sequence Y determines size of phi and psi **/  int len_y;  /** non functional stuff **/  int    *topo_order;  int    topo_order_length;} plocal_store_t;  static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv);  static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y);  static int pviterbi_free(plocal_store_t **v, int n, int len_x, int len_y,			   int max_offset_x, int max_offset_y);  static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y);  static double get_phi(plocal_store_t * pv, int x, int y, int offset_x,			int offset_y, int state);  static void set_phi(plocal_store_t * pv, int x, int y, int ghmm_dstate,		      double prob);  static void push_back_phi(plocal_store_t * pv, int length_y);  static void set_psi(plocal_store_t * pv, int x, int y, int ghmm_dstate,		      int from_state);/*============================================================================*/static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y) {#define CUR_PROC "pviterbi_alloc"  plocal_store_t* v = NULL;  int i, j;  ARRAY_CALLOC (v, 1);  v->mo = mo;  v->len_y = len_y;  v->len_x = len_x;  /* Allocate the log_in_a's -> individal lenghts */  ARRAY_CALLOC (v->log_in_a, mo->N);  /* first index of log_in_a: target state */  for (j = 0; j < mo->N; j++){     /* second index: source state */    ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states);    for (i=0; i<mo->s[j].in_states; i++) {      /* third index: transition classes of source state */      ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses);    }  }  ARRAY_CALLOC (v->log_b, mo->N);  for (j=0; j<mo->N; j++) {    ARRAY_CALLOC (v->log_b[j], ghmm_dpmodel_emission_table_size(mo, j) + 1);  }  if (!(v->log_b)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}  v->phi = ighmm_cmatrix_3d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);  if (!(v->phi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}  ARRAY_CALLOC (v->phi_new, mo->N);  v->psi = ighmm_dmatrix_3d_alloc(len_x + mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);  if (!(v->psi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}  v->topo_order_length = 0;  ARRAY_CALLOC (v->topo_order, mo->N);  return(v);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  pviterbi_free((&v), mo->N, len_x, len_y, mo->max_offset_x, mo->max_offset_y);  return(NULL);#undef CUR_PROC} /* viterbi_alloc *//*============================================================================*/static int pviterbi_free(plocal_store_t **v, int n, int len_x, int len_y,			 int max_offset_x, int max_offset_y) {#define CUR_PROC "pviterbi_free"  int i, j;  mes_check_ptr(v, return(-1));  if( !*v ) return(0);  for (j = 0; j < n; j++) {    for (i=0; i < (*v)->mo->s[j].in_states; i++)      m_free((*v)->log_in_a[j][i]);    m_free((*v)->log_in_a[j]);  }      m_free((*v)->log_in_a);  for (j=0; j<n; j++)    m_free((*v)->log_b[j]);  m_free((*v)->log_b);  ighmm_cmatrix_3d_free( &((*v)->phi), max_offset_x + 1, 		   len_y + max_offset_y + 1);  m_free((*v)->phi_new);  ighmm_dmatrix_3d_free( &((*v)->psi), len_x + max_offset_x + 1, len_y + max_offset_y + 1);  m_free((*v)->topo_order);  (*v)->mo = NULL;  m_free(*v);  return(0);#undef CUR_PROC} /* viterbi_free *//*============================================================================*/static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv) {  int j, k;  ghmm_dpmodel * mo;  printf("Local store for pair HMM viterbi algorithm\n");  printf("Log in a:\n");  mo = pv->mo;  for (j = 0; j < mo->N; j++){    printf("state %i in states %i\n", j, mo->s[j].in_states);    for (k=0; k<mo->s[j].in_states; k++)      printf("FIXME: log_in_a has three dimensions!"/*From: %i %f\n", mo->s[j].in_id[k], pv->log_in_a[j][k]*/);  }  printf("Log b:\n");  for (j = 0; j < mo->N; j++){    printf("state %i #chars: %i\n", j, ghmm_dpmodel_emission_table_size(mo, j));    for (k=0; k<ghmm_dpmodel_emission_table_size(mo, j); k++)      printf("Emission prob char: %i %f\n", k, pv->log_b[j][k]);  } }/*============================================================================*/static void pviterbi_precompute( ghmm_dpmodel *mo, plocal_store_t *v) {#define CUR_PROC "pviterbi_precompute"  int i, j, emission, t_class;    /* Precomputing the log(a_ij) */    for (j = 0; j < mo->N; j++)    for (i = 0; i < mo->s[j].in_states; i++)       for (t_class = 0; t_class < mo->s[mo->s[j].in_id[i]].kclasses; t_class++){	if ( mo->s[j].in_a[i][t_class] == 0.0 )   /* DBL_EPSILON ? */	  v->log_in_a[j][i][t_class] = +1; /*Not used any further in the calculations */	else	  v->log_in_a[j][i][t_class] = log( mo->s[j].in_a[i][t_class] );      }	  /* Precomputing the log emission probabilities for each state*/  for (j = 0; j < mo->N; j++) {    for (emission = 0; emission < ghmm_dpmodel_emission_table_size(mo,j); emission++) {      if (1) {	if ( mo->s[j].b[emission] == 0.0 )   /* DBL_EPSILON ? */ 	  v->log_b[j][emission] = +1; 	else	  v->log_b[j][emission] = log( mo->s[j].b[emission] );      }      else{	v->log_b[j][emission] = 0.0;       }    }    v->log_b[j][emission] = 1; /* the last field is for invalid emissions */  }#undef CUR_PROC}/* viterbi_precompute *//*============================================================================*//** *//* static void p__viterbi_silent( ghmm_dmodel *mo, int t, plocal_store_t *v ) *//* { *//*   int topocount; *//*   int i, k; *//*   double max_value, value; *//*   for ( topocount = 0; topocount < mo->topo_order_length; topocount++) { *//*       k = mo->topo_order[topocount]; *//*       if ( mo->silent[k] ) { /\* Silent states *\/	 *//* 	  /\* Determine the maximum *\/ *//* 	  /\* max_phi = phi[i] + log_in_a[j][i] ... *\/ *//* 	  max_value = -DBL_MAX; *//* 	  v->psi[t][k] = -1; *//* 	  for (i = 0; i < mo->s[k].in_states; i++) { */	    /* 	  if ( v->phi[ mo->s[k].in_id[i] ] != +1 && *//* 		 v->log_in_a[k][i]      != +1) { *//* 	      value = v->phi[ mo->s[k].in_id[i] ] + v->log_in_a[k][i]; *//* 	      if (value > max_value) { *//* 		max_value = value; *//* 		v->psi[t][k] = mo->s[k].in_id[i]; *//* 	      } *//* 	    } *//* 	  } */	  /* 	  /\* No maximum found (that is, state never reached) *//* 	     or the output O[t] = 0.0: *\/ *//* 	  if (max_value    == -DBL_MAX) { *//* 	    v->phi[k] = +1; *//* 	  } else { *//* 	    v->phi[k] = max_value; *//* 	  } */	  /* 	} *//*   } *//* } *//*============================================================================*/static double sget_log_in_a(plocal_store_t * pv, int i, int j, ghmm_dpseq * X, ghmm_dpseq * Y, int index_x, int index_y) {  /* determine the transition class for the source ghmm_dstate */  int id = pv->mo->s[i].in_id[j];  int cl = pv->mo->s[id].class_change->get_class(pv->mo, X, Y, index_x,index_y,				   pv->mo->s[id].class_change->user_data);  return pv->log_in_a[i][j][cl];}/*============================================================================*/static double log_b(plocal_store_t * pv, int state, int emission) {#ifdef DEBUG  if (ghmm_dstate > pv->mo->N)     fprintf(stderr, "log_b: State index out of bounds %i > %i\n", ghmm_dstate, pv->mo->N);  if (emission > ghmm_dpmodel_emission_table_size(pv->mo, state))    fprintf(stderr, "log_b: Emission index out of bounds %i > %i for ghmm_dstate %i\n",	    emission, ghmm_dpmodel_emission_table_size(pv->mo, state), state); 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?