model.c

来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 2,232 行 · 第 1/5 页

C
2,232
字号
/*********************************************************************************       This file is part of the General Hidden Markov Model Library,*       GHMM version 0.8_beta1, see http://ghmm.org**       Filename: ghmm/ghmm/model.c*       Authors:  Benhard Knab, Bernd Wichern, Benjamin Georgi, Alexander Schliep**       Copyright (C) 1998-2004 Alexander Schliep*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,*                               Berlin**       Contact: schliep@ghmm.org**       This library is free software; you can redistribute it and/or*       modify it under the terms of the GNU Library General Public*       License as published by the Free Software Foundation; either*       version 2 of the License, or (at your option) any later version.**       This library is distributed in the hope that it will be useful,*       but WITHOUT ANY WARRANTY; without even the implied warranty of*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU*       Library General Public License for more details.**       You should have received a copy of the GNU Library General Public*       License along with this library; if not, write to the Free*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA***       This file is version $Revision: 1898 $*                       from $Date: 2007-09-25 12:34:37 +0200 (Tue, 25 Sep 2007) $*             last change by $Author: grunau $.********************************************************************************/#ifdef WIN32#  include "win_config.h"#endif#ifdef HAVE_CONFIG_H#  include "../config.h"#endif#include <float.h>#include <math.h>#include <assert.h>#include "model.h"#include "matrix.h"#include "sequence.h"#include "rng.h"#include "foba.h"#include "mes.h"#include "mprintf.h"#include "string.h"#include "vector.h"#include "ghmm_internals.h"#include "xmlreader.h"#include "xmlwriter.h"#include "obsolete.h"#define  __EPS 10e-6/* Using floating point exceptions */#ifdef DEBUG_FPE#include <fenv.h>static void __attribute__ ((constructor)) trapfpe (void){  /* Enable some exceptions. At startup all exceptions are masked. */  feenableexcept (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);}#endiftypedef enum DFSFLAG { DONE, NOTVISITED, VISITED } DFSFLAG;/*----------------------------------------------------------------------------*/int ghmm_ipow(const ghmm_dmodel * mo, int x, unsigned int n) {#define CUR_PROC "ghmm_ipow"  int result=1;    if ((mo->M == x) && (n <= mo->maxorder + 1)) {    if (mo->pow_lookup)      result = mo->pow_lookup[n];  } else {    while (n != 0) {      if (n & 1)        result *= x;      x *= x;      n >>= 1;    }  }  return result;#undef CUR_PROC}/*----------------------------------------------------------------------------*/static int model_state_alloc (ghmm_dstate * s, int M, int in_states,                              int out_states){# define CUR_PROC "model_state_alloc"  int res = -1;  ARRAY_CALLOC (s->b, M);  if (out_states > 0) {    ARRAY_CALLOC (s->out_id, out_states);    ARRAY_CALLOC (s->out_a, out_states);  }  if (in_states > 0) {    ARRAY_CALLOC (s->in_id, in_states);    ARRAY_CALLOC (s->in_a, in_states);  }  res = 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return (res);# undef CUR_PROC}                               /* model_state_alloc *//*----------------------------------------------------------------------------*/ghmm_dmodel * ghmm_dmodel_calloc(int M, int N, int modeltype, int * inDegVec,				int * outDegVec) {#define CUR_PROC "ghmm_dmodel_calloc"  int i;  ghmm_dmodel * mo;  assert(modeltype & GHMM_kDiscreteHMM);  ARRAY_CALLOC(mo, 1);  mo->M = M;  mo->N = N;  mo->model_type = modeltype;  ARRAY_CALLOC(mo->s, N);  for (i=0; i<N; i++) {    if (model_state_alloc(&(mo->s[i]), M, inDegVec[i], outDegVec[i]))      goto STOP;  }  if (mo->model_type & GHMM_kSilentStates)    ARRAY_CALLOC(mo->silent, N);  if (mo->model_type & GHMM_kTiedEmissions) {    ARRAY_CALLOC(mo->tied_to, N);    for (i=0; i<N; ++i)      mo->tied_to[i] = GHMM_kUntied;  }     if (mo->model_type & GHMM_kBackgroundDistributions) {    ARRAY_MALLOC(mo->background_id, N);    for (i=0; i<N; ++i)      mo->background_id[i] = GHMM_kNoBackgroundDistribution;  }  if (mo->model_type & GHMM_kHigherOrderEmissions)    ARRAY_CALLOC(mo->order, N);     if (mo->model_type & GHMM_kLabeledStates)    ARRAY_CALLOC(mo->label, N);  return mo;STOP:  ghmm_dmodel_free(&mo);  return NULL;#undef CUR_PROC}/*----------------------------------------------------------------------------*/#ifdef GHMM_OBSOLETEstatic int model_copy_vectors (ghmm_dmodel * mo, int index, double **a_matrix,                               double **b_matrix, double *pi, int *fix){#define CUR_PROC "model_copy_vectors"  int i, cnt_out = 0, cnt_in = 0;  mo->s[index].pi = pi[index];  mo->s[index].fix = fix[index];  for (i = 0; i < mo->M; i++)    mo->s[index].b[i] = b_matrix[index][i];  for (i = 0; i < mo->N; i++) {    if (a_matrix[index][i]) {   /* Transitions to a following state possible */      if (cnt_out >= mo->s[index].out_states) {        GHMM_LOG_QUEUED(LCONVERTED);        return (-1);      }      mo->s[index].out_id[cnt_out] = i;      mo->s[index].out_a[cnt_out] = a_matrix[index][i];      cnt_out++;    }    if (a_matrix[i][index]) {   /* Transitions to a previous state possible */      if (cnt_in >= mo->s[index].in_states) {        GHMM_LOG_QUEUED(LCONVERTED);        return (-1);      }      mo->s[index].in_id[cnt_in] = i;      mo->s[index].in_a[cnt_in] = a_matrix[i][index];      cnt_in++;    }  }  return (0);#undef CUR_PROC}                               /* model_copy_vectors *//*============================================================================*//* Old prototype:ghmm_dmodel **ghmm_dmodel_read(char *filename, int *mo_number, int **seq,			 const int *seq_len, int seq_number) { */ghmm_dmodel **ghmm_dmodel_read (char *filename, int *mo_number){#define CUR_PROC "ghmm_dmodel_read"  int j;  long new_models = 0;  scanner_t *s = NULL;  ghmm_dmodel **mo = NULL;  *mo_number = 0;  s = ighmm_scanner_alloc (filename);  if (!s) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  while (!s->err && !s->eof) {    ighmm_scanner_get_name (s);    ighmm_scanner_consume (s, '=');    if (s->err)      goto STOP;    if (!strcmp (s->id, "HMM") || !strcmp (s->id, "hmm")) {      (*mo_number)++;      /* more mem */      ARRAY_REALLOC (mo, *mo_number);      mo[*mo_number - 1] = ghmm_dmodel_direct_read (s, (int *) &new_models);      if (!mo[*mo_number - 1]) {        GHMM_LOG_QUEUED(LCONVERTED);        goto STOP;      }      /* Copies the model, that has already been read. */      if (new_models > 1) {        /* "-1" because mo_number++ has already been done. */        ARRAY_REALLOC (mo, *mo_number - 1 + new_models);        for (j = 1; j < new_models; j++) {          mo[*mo_number] = ghmm_dmodel_copy (mo[*mo_number - 1]);          if (!mo[*mo_number]) {            GHMM_LOG_QUEUED(LCONVERTED);            goto STOP;          }          (*mo_number)++;        }      }    }    else if (!strcmp (s->id, "HMM_SEQ")) {      ghmm_dmodel **tmp_mo = NULL;      tmp_mo = ghmm_dmodel_from_sequence_ascii (s, &new_models);      ARRAY_REALLOC (mo, (*mo_number + new_models));      for (j = 0; j < new_models; j++) {        if (!tmp_mo[j]) {          GHMM_LOG_QUEUED(LCONVERTED);          goto STOP;        }        mo[*mo_number] = tmp_mo[j];        (*mo_number)++;      }    }    else {      ighmm_scanner_error (s, "unknown identifier");      goto STOP;    }    ighmm_scanner_consume (s, ';');    if (s->err)      goto STOP;  }                             /* while(!s->err && !s->eof) */    ighmm_scanner_free(&s);  return mo;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ighmm_scanner_free(&s);  return NULL;#undef CUR_PROC}                               /* ghmm_dmodel_read *//*============================================================================*/ghmm_dmodel *ghmm_dmodel_direct_read (scanner_t * s, int *multip){#define CUR_PROC "ghmm_dmodel_direct_read"  int i, m_read, n_read, a_read, b_read, pi_read, len, prior_read, fix_read;  int mt_read=0;  ghmm_dmodel *mo = NULL;  double **a_matrix = NULL, **b_matrix = NULL;  double *pi_vector = NULL;  int *fix_vector = NULL;  m_read = n_read = a_read = b_read = pi_read = prior_read = fix_read = 0;  *multip = 1;                  /* default */  ARRAY_CALLOC (mo, 1);  ighmm_scanner_consume (s, '{');  if (s->err)    goto STOP;  while (!s->err && !s->eof && s->c - '}') {    ighmm_scanner_get_name(s);    if (strcmp(s->id, "M") && strcmp(s->id, "N") && strcmp(s->id, "Pi")        && strcmp(s->id, "A") && strcmp(s->id, "B") && strcmp(s->id, "multip")	&& strcmp(s->id, "prior") && strcmp(s->id, "fix_state") && strcmp(s->id, "ModelType")) {      ighmm_scanner_error(s, "unknown identifier");      goto STOP;    }    ighmm_scanner_consume (s, '=');    if (s->err)      goto STOP;    if (!strcmp (s->id, "multip")) {      *multip = ighmm_scanner_get_int (s);      if (*multip < 1) {        /* Doesn't make any sense! */        *multip = 1;        GHMM_LOG(LCONVERTED, "Multip < 1 ignored\n");      }    }    else if (!strcmp (s->id, "M")) {    /*Number of output values */      if (m_read) {        ighmm_scanner_error (s, "identifier M twice");        goto STOP;      }      mo->M = ighmm_scanner_get_int (s);      m_read = 1;    }    else if (!strcmp (s->id, "N")) {    /*Number of states */      if (n_read) {        ighmm_scanner_error (s, "identifier N twice");        goto STOP;      }      mo->N = ighmm_scanner_get_int (s);      ARRAY_CALLOC (mo->s, mo->N);      n_read = 1;    }    else if (!strcmp (s->id, "A")) {    /*Transition probability */      if (!n_read) {        ighmm_scanner_error (s, "need N as a range for A");        goto STOP;      }      if (a_read) {        ighmm_scanner_error (s, "identifier A twice");        goto STOP;      }      ARRAY_CALLOC (a_matrix, mo->N);      ighmm_scanner_get_name (s);      if (!strcmp (s->id, "matrix")) {        if (ighmm_cmatrix_read (s, a_matrix, mo->N, mo->N)) {          ighmm_scanner_error (s, "unable to read matrix A");          goto STOP;        }        a_read = 1;      }      else {        ighmm_scanner_error (s, "unknown identifier");        goto STOP;      }    }    else if (!strcmp (s->id, "B")) {    /*Output probability */      if ((!n_read) || (!m_read)) {        ighmm_scanner_error (s, "need M and N as a range for B");        goto STOP;      }      if (b_read) {        ighmm_scanner_error (s, "identifier B twice");        goto STOP;      }      ARRAY_CALLOC (b_matrix, mo->N);      ighmm_scanner_get_name (s);      if (!strcmp (s->id, "matrix")) {        if (ighmm_cmatrix_read (s, b_matrix, mo->N, mo->M)) {          ighmm_scanner_error (s, "unable to read matrix B");          goto STOP;        }        b_read = 1;      }      else {        ighmm_scanner_error (s, "unknown identifier");        goto STOP;      }    }    else if (!strcmp (s->id, "prior")) {        /*A prior model */      if (prior_read) {        ighmm_scanner_error (s, "identifier prior twice");        goto STOP;      }      mo->prior = ighmm_scanner_get_edouble (s);      if ((mo->prior < 0 || mo->prior > 1) && mo->prior != -1) {        ighmm_scanner_error (s, "invalid model prior");        goto STOP;      }      prior_read = 1;    }    else if (!strcmp (s->id, "ModelType")) {    /* Model type*/      if (mt_read) {        ighmm_scanner_error(s, "identifier ModelType twice");        goto STOP;      }      mo->model_type = ighmm_scanner_get_int(s);      if (mo->model_type & (GHMM_kLabeledStates + GHMM_kHigherOrderEmissions)) {	ighmm_scanner_error(s, "unsupported Model Type");	goto STOP;      }      mt_read = 1;    }    else if (!strcmp (s->id, "Pi")) {   /*Initial state probabilty */      if (!n_read) {        ighmm_scanner_error (s, "need N as a range for Pi");        goto STOP;      }      if (pi_read) {        ighmm_scanner_error (s, "identifier Pi twice");        goto STOP;      }      ighmm_scanner_get_name (s);      if (!strcmp (s->id, "vector")) {        ighmm_scanner_consume (s, '{');        if (s->err)          goto STOP;        pi_vector = scanner_get_double_earray (s, &len);        if (len != mo->N) {          ighmm_scanner_error (s, "wrong number of elements in PI");          goto STOP;        }        ighmm_scanner_consume (s, ';');        if (s->err)          goto STOP;        ighmm_scanner_consume (s, '}');        if (s->err)          goto STOP;        pi_read = 1;      }      else {        ighmm_scanner_error (s, "unknown identifier");        goto STOP;      }    }    else if (!strcmp (s->id, "fix_state")) {      if (!n_read) {        ighmm_scanner_error (s, "need N as a range for fix_state");        goto STOP;      }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?