⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sdmodel.c

📁 General Hidden Markov Model Library 一个通用的隐马尔科夫模型的C代码库
💻 C
📖 第 1 页 / 共 2 页
字号:
/*********************************************************************************       This file is part of the General Hidden Markov Model Library,*       GHMM version 0.8_beta1, see http://ghmm.org**       Filename: ghmm/ghmm/sdmodel.c*       Authors:  Wasinee Rungsarityotin, Utz Pape, Andrea Weisse, janne Grunau**       Copyright (C) 1998-2004 Alexander Schliep*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,*                               Berlin**       Contact: schliep@ghmm.org**       This library is free software; you can redistribute it and/or*       modify it under the terms of the GNU Library General Public*       License as published by the Free Software Foundation; either*       version 2 of the License, or (at your option) any later version.**       This library is distributed in the hope that it will be useful,*       but WITHOUT ANY WARRANTY; without even the implied warranty of*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU*       Library General Public License for more details.**       You should have received a copy of the GNU Library General Public*       License along with this library; if not, write to the Free*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA***       This file is version $Revision: 1713 $*                       from $Date: 2006-10-16 16:06:28 +0200 (Mon, 16 Oct 2006) $*             last change by $Author: grunau $.********************************************************************************/#ifdef WIN32#  include "win_config.h"#endif#ifdef HAVE_CONFIG_H#  include "../config.h"#endif#undef NDEBUG#include <assert.h>#include <float.h>#include <math.h>#include <stdlib.h>#include <string.h>#include "sdmodel.h"#include "model.h"#include "matrix.h"#include "sequence.h"#include "rng.h"#include "sdfoba.h"#include "mes.h"#include "mprintf.h"#include "ghmm_internals.h"#define  __EPS 10e-6/*----------------------------------------------------------------------------*/static int dsmodel_state_alloc (ghmm_dsstate * s, int M, int in_states,                                int out_states, int cos){#define CUR_PROC "sdmodel_state_alloc"  int res = -1;  ARRAY_CALLOC (s->b, M);  if (out_states > 0) {    ARRAY_CALLOC (s->out_id, out_states);    s->out_a = ighmm_cmatrix_alloc (cos, out_states);    if (!s->out_a) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }  }  if (in_states > 0) {    ARRAY_CALLOC (s->in_id, in_states);    s->in_a = ighmm_cmatrix_alloc (cos, in_states);    if (!s->in_a) {      GHMM_LOG_QUEUED(LCONVERTED);      goto STOP;    }  }  res = 0;STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  return (res);#undef CUR_PROC}                               /* model_state_alloc *//*----------------------------------------------------------------------------*/double ghmm_dsmodel_likelihood (ghmm_dsmodel * mo, ghmm_dseq * sq){# define CUR_PROC "ghmm_dsmodel_likelihood"  double log_p_i, log_p;  int found, i;  found = 0;  log_p = 0.0;  for (i = 0; i < sq->seq_number; i++) {    ghmm_dsmodel_logp (mo, sq->seq[i], sq->seq_len[i], &log_p_i);    if (log_p_i != +1) {      log_p += log_p_i;      found = 1;    }    /*    else {*/    /*  char *str =*/    /*   ighmm_mprintf(NULL, 0, "sequence[%d] can't be build.\n", i);*/    /*  GHMM_LOG(LCONVERTED, str);*/    /*}*/  }  if (!found)    log_p = +1.0;  return (log_p);#undef CUR_PROC}                               /*ghmm_dsmodel_likelihood */#ifdef sdmodelSTATIC/*----------------------------------------------------------------------------*/static int sdmodel_copy_vectors (ghmm_dsmodel * mo, int index, double ***a_matrix,                                 double **b_matrix, double *pi, int *fix){#define CUR_PROC "sdmodel_copy_vectors"  int i, c, cnt_out = 0, cnt_in = 0;  mo->s[index].pi = pi[index];  mo->s[index].fix = fix[index];  for (i = 0; i < mo->M; i++)    mo->s[index].b[i] = b_matrix[index][i];  for (c = 0; c < mo->cos; c++)    for (i = 0; i < mo->N; i++) {      if (a_matrix[c][index][i]) {      /* Transitions to a following state possible */        if (cnt_out >= mo->s[index].out_states) {          GHMM_LOG_QUEUED(LCONVERTED);          return (-1);        }        mo->s[index].out_id[cnt_out] = i;        mo->s[index].out_a[c][cnt_out] = a_matrix[c][index][i];        cnt_out++;      }      if (a_matrix[i][index]) { /* Transitions to a previous state possible */        if (cnt_in >= mo->s[index].in_states) {          GHMM_LOG_QUEUED(LCONVERTED);          return (-1);        }        mo->s[index].in_id[cnt_in] = i;        mo->s[index].in_a[c][cnt_in] = a_matrix[c][i][index];        cnt_in++;      }    }  return (0);#undef CUR_PROC}                               /* model_alloc_vectors */#endif/*============================================================================*/ghmm_dsmodel * ghmm_dsmodel_calloc(int M, int N, int modeltype, int cos,				   int * inDegVec, int * outDegVec) {#define CUR_PROC "ghmm_dsmodel_calloc"  int i;  ghmm_dsmodel * mo;  assert((modeltype & GHMM_kDiscreteHMM) && (modeltype & GHMM_kTransitionClasses));  ARRAY_CALLOC(mo, 1);  mo->M = M;  mo->N = N;  mo->model_type = modeltype;  ARRAY_CALLOC(mo->s, N);  for (i=0; i<N; i++) {    if (dsmodel_state_alloc(&(mo->s[i]), M, inDegVec[i], outDegVec[i], cos))      goto STOP;  }  if (mo->model_type & GHMM_kSilentStates)    ARRAY_CALLOC(mo->silent, N);  if (mo->model_type & GHMM_kTiedEmissions) {    ARRAY_CALLOC(mo->tied_to, N);    for (i=0; i<N; ++i)      mo->tied_to[i] = GHMM_kUntied;  }     if (mo->model_type & GHMM_kBackgroundDistributions) {    ARRAY_MALLOC(mo->background_id, N);    for (i=0; i<N; ++i)      mo->background_id[i] = GHMM_kNoBackgroundDistribution;  }  if (mo->model_type & GHMM_kHigherOrderEmissions)    ARRAY_CALLOC(mo->order, N);     if (mo->model_type & GHMM_kLabeledStates)    ARRAY_CALLOC(mo->label, N);  return mo;STOP:  ghmm_dsmodel_free(&mo);  return NULL;#undef CUR_PROC}/*============================================================================*/int ghmm_dsmodel_free(ghmm_dsmodel ** mo) {#define CUR_PROC "ghmm_dsmodel_free"  ghmm_dsstate *my_state;  int i;  mes_check_ptr (mo, return (-1));  if (!*mo)    return (0);  for (i = 0; i < (*mo)->N; i++) {    my_state = &((*mo)->s[i]);    if (my_state->b)      m_free (my_state->b);    if (my_state->out_id != NULL)      m_free (my_state->out_id);    if (my_state->in_id != NULL)      m_free (my_state->in_id);    if (my_state->out_a)      ighmm_cmatrix_free (&((*mo)->s[i].out_a), (*mo)->cos);    if (my_state->in_a)      ighmm_cmatrix_free (&((*mo)->s[i].in_a), (*mo)->cos);    my_state->pi = 0;    my_state->b = NULL;    my_state->out_id = NULL;    my_state->in_id = NULL;    my_state->out_a = NULL;    my_state->in_a = NULL;    my_state->out_states = 0;    my_state->in_states = 0;    my_state->fix = 0;  }  if ((*mo)->model_type & GHMM_kLabeledStates)    m_free((*mo)->label);  if ((*mo)->model_type & GHMM_kSilentStates) {    m_free((*mo)->silent);    if ((*mo)->topo_order)      m_free((*mo)->topo_order);  }  m_free((*mo)->s);  m_free(*mo);  /*fprintf (stderr, "Free sdmodel\n");*/  return (0);#undef CUR_PROC}                               /* ghmm_dmodel_free *//*============================================================================*/ghmm_dsmodel * ghmm_dsmodel_copy(const ghmm_dsmodel * mo) {# define CUR_PROC "ghmm_dsmodel_copy"  int i, j, k, nachf, vorg, m;  ghmm_dsmodel *m2 = NULL;  ARRAY_CALLOC (m2, 1);  ARRAY_CALLOC (m2->s, mo->N);  for (i = 0; i < mo->N; i++) {    nachf = mo->s[i].out_states;    vorg = mo->s[i].in_states;    ARRAY_CALLOC (m2->s[i].out_id, nachf);    m2->s[i].out_a = ighmm_cmatrix_alloc (mo->cos, nachf);    ARRAY_CALLOC (m2->s[i].in_id, vorg);    m2->s[i].in_a = ighmm_cmatrix_alloc (mo->cos, vorg);    ARRAY_CALLOC (m2->s[i].b, mo->M);    /* Copy the values */    for (j = 0; j < nachf; j++) {      for (k = 0; k < mo->cos; k++) {        m2->s[i].out_a[k][j] = mo->s[i].out_a[k][j];      }      m2->s[i].out_id[j] = mo->s[i].out_id[j];    }    for (j = 0; j < vorg; j++) {      for (k = 0; k < mo->cos; k++) {        m2->s[i].in_a[k][j] = mo->s[i].in_a[k][j];      }      m2->s[i].in_id[j] = mo->s[i].in_id[j];    }    for (m = 0; m < mo->M; m++) {      m2->s[i].b[m] = mo->s[i].b[m];    }    m2->s[i].pi = mo->s[i].pi;    m2->s[i].out_states = nachf;    m2->s[i].in_states = vorg;    m2->s[i].countme = mo->s[i].countme;  }  m2->N = mo->N;  m2->M = mo->M;  m2->prior = mo->prior;  m2->cos = mo->cos;  m2->model_type = mo->model_type;  if (mo->model_type & GHMM_kSilentStates) {    assert (mo->silent != NULL);    ARRAY_CALLOC (m2->silent, mo->N);    for (i = 0; i < mo->N; i++) {      m2->silent[i] = mo->silent[i];    }    if (mo->topo_order_length > 0) {      ARRAY_CALLOC (m2->topo_order, mo->topo_order_length);      for (i = 0; i < mo->topo_order_length; i++) {        m2->topo_order[i] = mo->topo_order[i];      }    }  }  if (mo->model_type & GHMM_kLabeledStates) {    ARRAY_MALLOC(m2->label, m2->N);    for (i=0; i<mo->N; i++)      m2->label[i] = mo->label[i];  }  if (mo->get_class) {    m2->get_class = mo->get_class;  }  return (m2);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_dsmodel_free (&m2);  return (NULL);# undef CUR_PROC}                               /* ghmm_dmodel_copy *//*----------------------------------------------------------------------------*/void ghmm_dsmodel_topo_order (ghmm_dsmodel * mo){#define CUR_PROC "ghmm_dsmodel_topo_order"  fprintf (stderr, "ghmm_dsmodel_topo_order will be implemented using DFS.\n");#undef CUR_PROC}#ifdef sdmodelSTATIC/*============================================================================*/static ghmm_dseq *__sdmodel_generate_sequences (ghmm_dsmodel * mo, int seed,                                                 int global_len,                                                 long seq_number, int Tmax){# define CUR_PROC "ghmm_dsmodel_generate_sequences"  /* An end state is characterized by not having an output probabiliy. */  unsigned long tm;             /* Time seed */  ghmm_dseq *sq = NULL;  int state, n, i, j, m, reject_os, reject_tmax, badseq, class;  double p, sum, osum = 0.0;  int len = global_len, up = 0, stillbadseq = 0, reject_os_tmp = 0;  double dummy = 0.0;  sq = ghmm_dseq_calloc (seq_number);  if (!sq) {    GHMM_LOG_QUEUED(LCONVERTED);    goto STOP;  }  if (len <= 0)    /* A specific length of the sequences isn't given. As a model should have       an end state, the constant MAX_SEQ_LEN is used. */    len = (int) MAX_SEQ_LEN;  if (seed > 0) {    GHMM_RNG_SET (RNG, seed);  }  else {    tm = rand ();    GHMM_RNG_SET (RNG, tm);    fprintf (stderr, "# using rng '%s' seed=%ld\n", GHMM_RNG_NAME (RNG), tm);  }  n = 0;  reject_os = reject_tmax = 0;  while (n < seq_number) {    /* Test: A new seed for each sequence */    /*   ghmm_rng_timeseed(RNG); */    stillbadseq = badseq = 0;    ARRAY_CALLOC (sq->seq[n], len);    /* Get a random initial state i */    p = GHMM_RNG_UNIFORM (RNG);    sum = 0.0;    for (i = 0; i < mo->N; i++) {      sum += mo->s[i].pi;      if (sum >= p)        break;    }    /* Get a random initial output m */    p = GHMM_RNG_UNIFORM (RNG);    sum = 0.0;    for (m = 0; m < mo->M; m++) {      sum += mo->s[i].b[m];      if (sum >= p)        break;    }    sq->seq[n][0] = m;    state = 1;    /* The first symbol chooses the start class */    class = mo->get_class (sq->seq[n], state);    /*class = ghmm_cseq_class(&dummy, 0, &osum); */ /*  dummy function */    while (state < len) {      /* Get a new state */      p = GHMM_RNG_UNIFORM (RNG);      sum = 0.0;      for (j = 0; j < mo->s[i].out_states; j++) {        sum += mo->s[i].out_a[class][j];        if (sum >= p)          break;      }      if (sum == 0.0) {        if (mo->s[i].out_states > 0) {          /* Repudiate the sequence, if all smo->s[i].out_a[class][.] == 0,             that is, class "class" isn't used in the original data:             go out of the while-loop, n should not be counted. */          /* printf("Zustand %d, class %d, len %d out_states %d \n", i, class,             state, smo->s[i].out_states); */          badseq = 1;          /* break; */          /* Try: If the class is "empty", try the neighbour class;             first, sweep down to zero; if still no success, sweep up to             COS - 1. If still no success --> Repudiate the sequence. */          if (class > 0 && up == 0) {            class--;            continue;          }          else if (class < mo->cos - 1) {            class++;            up = 1;            continue;          }          else {            stillbadseq = 1;            break;          }        }        else          /* An end state is reached, get out of the while-loop */          break;      }      i = mo->s[i].out_id[j];      /* Get a random output m from state i */      p = GHMM_RNG_UNIFORM (RNG);      sum = 0.0;      for (m = 0; m < mo->M; m++) {        sum += mo->s[i].b[m];        if (sum >= p)          break;      }      sq->seq[n][state] = m;      /* Decide the class for the next step */      class = mo->get_class (sq->seq[n], state);      /*class = ghmm_cseq_class(&dummy, state, &osum); */ /* dummy */      up = 0;      state++;    }                           /* while (state < len) , global_len depends on the data */    if (badseq) {      reject_os_tmp++;    }    if (stillbadseq) {      reject_os++;      m_free (sq->seq[n]);      /*      printf("cl %d, s %d, %d\n", class, i, n); */    }    else if (state > Tmax) {      reject_tmax++;      m_free (sq->seq[n]);    }    else {      if (state < len)        ARRAY_REALLOC (sq->seq[n], state);      sq->seq_len[n] = state;      /* ighmm_cvector_print(stdout, sq->seq[n], sq->seq_len[n]," "," ",""); */      n++;    }    /*    printf("reject_os %d, reject_tmax %d\n", reject_os, reject_tmax); */    if (reject_os > 10000) {      GHMM_LOG(LCONVERTED, "Reached max. no. of rejections\n");      break;    }    if (!(n % 1000))      printf ("%d Seqs. generated\n", n);  }                             /* n-loop */  if (reject_os > 0)    printf ("%d sequences rejected (os)!\n", reject_os);  if (reject_os_tmp > 0)    printf ("%d sequences changed class\n", reject_os_tmp - reject_os);  if (reject_tmax > 0)    printf ("%d sequences rejected (Tmax, %d)!\n", reject_tmax, Tmax);  return (sq);STOP:     /* Label STOP from ARRAY_[CM]ALLOC */  ghmm_dseq_free (&sq);  return (NULL);# undef CUR_PROC}                               /* data */#endifint ghmm_dsmodel_init_silent_states (ghmm_dsmodel * mo){#define CUR_PROC "ghmm_dsmodel_init_silent_states"  int nSilentStates = 0;  int i, m;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -