📄 sdmodel.c
字号:
/********************************************************************************* This file is part of the General Hidden Markov Model Library,* GHMM version 0.8_beta1, see http://ghmm.org** Filename: ghmm/ghmm/sdmodel.c* Authors: Wasinee Rungsarityotin, Utz Pape, Andrea Weisse, janne Grunau** Copyright (C) 1998-2004 Alexander Schliep* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,* Berlin** Contact: schliep@ghmm.org** This library is free software; you can redistribute it and/or* modify it under the terms of the GNU Library General Public* License as published by the Free Software Foundation; either* version 2 of the License, or (at your option) any later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU* Library General Public License for more details.** You should have received a copy of the GNU Library General Public* License along with this library; if not, write to the Free* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*** This file is version $Revision: 1713 $* from $Date: 2006-10-16 16:06:28 +0200 (Mon, 16 Oct 2006) $* last change by $Author: grunau $.********************************************************************************/#ifdef WIN32# include "win_config.h"#endif#ifdef HAVE_CONFIG_H# include "../config.h"#endif#undef NDEBUG#include <assert.h>#include <float.h>#include <math.h>#include <stdlib.h>#include <string.h>#include "sdmodel.h"#include "model.h"#include "matrix.h"#include "sequence.h"#include "rng.h"#include "sdfoba.h"#include "mes.h"#include "mprintf.h"#include "ghmm_internals.h"#define __EPS 10e-6/*----------------------------------------------------------------------------*/static int dsmodel_state_alloc (ghmm_dsstate * s, int M, int in_states, int out_states, int cos){#define CUR_PROC "sdmodel_state_alloc" int res = -1; ARRAY_CALLOC (s->b, M); if (out_states > 0) { ARRAY_CALLOC (s->out_id, out_states); s->out_a = ighmm_cmatrix_alloc (cos, out_states); if (!s->out_a) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } if (in_states > 0) { ARRAY_CALLOC (s->in_id, in_states); s->in_a = ighmm_cmatrix_alloc (cos, in_states); if (!s->in_a) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } res = 0;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return (res);#undef CUR_PROC} /* model_state_alloc *//*----------------------------------------------------------------------------*/double ghmm_dsmodel_likelihood (ghmm_dsmodel * mo, ghmm_dseq * sq){# define CUR_PROC "ghmm_dsmodel_likelihood" double log_p_i, log_p; int found, i; found = 0; log_p = 0.0; for (i = 0; i < sq->seq_number; i++) { ghmm_dsmodel_logp (mo, sq->seq[i], sq->seq_len[i], &log_p_i); if (log_p_i != +1) { log_p += log_p_i; found = 1; } /* else {*/ /* char *str =*/ /* ighmm_mprintf(NULL, 0, "sequence[%d] can't be build.\n", i);*/ /* GHMM_LOG(LCONVERTED, str);*/ /*}*/ } if (!found) log_p = +1.0; return (log_p);#undef CUR_PROC} /*ghmm_dsmodel_likelihood */#ifdef sdmodelSTATIC/*----------------------------------------------------------------------------*/static int sdmodel_copy_vectors (ghmm_dsmodel * mo, int index, double ***a_matrix, double **b_matrix, double *pi, int *fix){#define CUR_PROC "sdmodel_copy_vectors" int i, c, cnt_out = 0, cnt_in = 0; mo->s[index].pi = pi[index]; mo->s[index].fix = fix[index]; for (i = 0; i < mo->M; i++) mo->s[index].b[i] = b_matrix[index][i]; for (c = 0; c < mo->cos; c++) for (i = 0; i < mo->N; i++) { if (a_matrix[c][index][i]) { /* Transitions to a following state possible */ if (cnt_out >= mo->s[index].out_states) { GHMM_LOG_QUEUED(LCONVERTED); return (-1); } mo->s[index].out_id[cnt_out] = i; mo->s[index].out_a[c][cnt_out] = a_matrix[c][index][i]; cnt_out++; } if (a_matrix[i][index]) { /* Transitions to a previous state possible */ if (cnt_in >= mo->s[index].in_states) { GHMM_LOG_QUEUED(LCONVERTED); return (-1); } mo->s[index].in_id[cnt_in] = i; mo->s[index].in_a[c][cnt_in] = a_matrix[c][i][index]; cnt_in++; } } return (0);#undef CUR_PROC} /* model_alloc_vectors */#endif/*============================================================================*/ghmm_dsmodel * ghmm_dsmodel_calloc(int M, int N, int modeltype, int cos, int * inDegVec, int * outDegVec) {#define CUR_PROC "ghmm_dsmodel_calloc" int i; ghmm_dsmodel * mo; assert((modeltype & GHMM_kDiscreteHMM) && (modeltype & GHMM_kTransitionClasses)); ARRAY_CALLOC(mo, 1); mo->M = M; mo->N = N; mo->model_type = modeltype; ARRAY_CALLOC(mo->s, N); for (i=0; i<N; i++) { if (dsmodel_state_alloc(&(mo->s[i]), M, inDegVec[i], outDegVec[i], cos)) goto STOP; } if (mo->model_type & GHMM_kSilentStates) ARRAY_CALLOC(mo->silent, N); if (mo->model_type & GHMM_kTiedEmissions) { ARRAY_CALLOC(mo->tied_to, N); for (i=0; i<N; ++i) mo->tied_to[i] = GHMM_kUntied; } if (mo->model_type & GHMM_kBackgroundDistributions) { ARRAY_MALLOC(mo->background_id, N); for (i=0; i<N; ++i) mo->background_id[i] = GHMM_kNoBackgroundDistribution; } if (mo->model_type & GHMM_kHigherOrderEmissions) ARRAY_CALLOC(mo->order, N); if (mo->model_type & GHMM_kLabeledStates) ARRAY_CALLOC(mo->label, N); return mo;STOP: ghmm_dsmodel_free(&mo); return NULL;#undef CUR_PROC}/*============================================================================*/int ghmm_dsmodel_free(ghmm_dsmodel ** mo) {#define CUR_PROC "ghmm_dsmodel_free" ghmm_dsstate *my_state; int i; mes_check_ptr (mo, return (-1)); if (!*mo) return (0); for (i = 0; i < (*mo)->N; i++) { my_state = &((*mo)->s[i]); if (my_state->b) m_free (my_state->b); if (my_state->out_id != NULL) m_free (my_state->out_id); if (my_state->in_id != NULL) m_free (my_state->in_id); if (my_state->out_a) ighmm_cmatrix_free (&((*mo)->s[i].out_a), (*mo)->cos); if (my_state->in_a) ighmm_cmatrix_free (&((*mo)->s[i].in_a), (*mo)->cos); my_state->pi = 0; my_state->b = NULL; my_state->out_id = NULL; my_state->in_id = NULL; my_state->out_a = NULL; my_state->in_a = NULL; my_state->out_states = 0; my_state->in_states = 0; my_state->fix = 0; } if ((*mo)->model_type & GHMM_kLabeledStates) m_free((*mo)->label); if ((*mo)->model_type & GHMM_kSilentStates) { m_free((*mo)->silent); if ((*mo)->topo_order) m_free((*mo)->topo_order); } m_free((*mo)->s); m_free(*mo); /*fprintf (stderr, "Free sdmodel\n");*/ return (0);#undef CUR_PROC} /* ghmm_dmodel_free *//*============================================================================*/ghmm_dsmodel * ghmm_dsmodel_copy(const ghmm_dsmodel * mo) {# define CUR_PROC "ghmm_dsmodel_copy" int i, j, k, nachf, vorg, m; ghmm_dsmodel *m2 = NULL; ARRAY_CALLOC (m2, 1); ARRAY_CALLOC (m2->s, mo->N); for (i = 0; i < mo->N; i++) { nachf = mo->s[i].out_states; vorg = mo->s[i].in_states; ARRAY_CALLOC (m2->s[i].out_id, nachf); m2->s[i].out_a = ighmm_cmatrix_alloc (mo->cos, nachf); ARRAY_CALLOC (m2->s[i].in_id, vorg); m2->s[i].in_a = ighmm_cmatrix_alloc (mo->cos, vorg); ARRAY_CALLOC (m2->s[i].b, mo->M); /* Copy the values */ for (j = 0; j < nachf; j++) { for (k = 0; k < mo->cos; k++) { m2->s[i].out_a[k][j] = mo->s[i].out_a[k][j]; } m2->s[i].out_id[j] = mo->s[i].out_id[j]; } for (j = 0; j < vorg; j++) { for (k = 0; k < mo->cos; k++) { m2->s[i].in_a[k][j] = mo->s[i].in_a[k][j]; } m2->s[i].in_id[j] = mo->s[i].in_id[j]; } for (m = 0; m < mo->M; m++) { m2->s[i].b[m] = mo->s[i].b[m]; } m2->s[i].pi = mo->s[i].pi; m2->s[i].out_states = nachf; m2->s[i].in_states = vorg; m2->s[i].countme = mo->s[i].countme; } m2->N = mo->N; m2->M = mo->M; m2->prior = mo->prior; m2->cos = mo->cos; m2->model_type = mo->model_type; if (mo->model_type & GHMM_kSilentStates) { assert (mo->silent != NULL); ARRAY_CALLOC (m2->silent, mo->N); for (i = 0; i < mo->N; i++) { m2->silent[i] = mo->silent[i]; } if (mo->topo_order_length > 0) { ARRAY_CALLOC (m2->topo_order, mo->topo_order_length); for (i = 0; i < mo->topo_order_length; i++) { m2->topo_order[i] = mo->topo_order[i]; } } } if (mo->model_type & GHMM_kLabeledStates) { ARRAY_MALLOC(m2->label, m2->N); for (i=0; i<mo->N; i++) m2->label[i] = mo->label[i]; } if (mo->get_class) { m2->get_class = mo->get_class; } return (m2);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dsmodel_free (&m2); return (NULL);# undef CUR_PROC} /* ghmm_dmodel_copy *//*----------------------------------------------------------------------------*/void ghmm_dsmodel_topo_order (ghmm_dsmodel * mo){#define CUR_PROC "ghmm_dsmodel_topo_order" fprintf (stderr, "ghmm_dsmodel_topo_order will be implemented using DFS.\n");#undef CUR_PROC}#ifdef sdmodelSTATIC/*============================================================================*/static ghmm_dseq *__sdmodel_generate_sequences (ghmm_dsmodel * mo, int seed, int global_len, long seq_number, int Tmax){# define CUR_PROC "ghmm_dsmodel_generate_sequences" /* An end state is characterized by not having an output probabiliy. */ unsigned long tm; /* Time seed */ ghmm_dseq *sq = NULL; int state, n, i, j, m, reject_os, reject_tmax, badseq, class; double p, sum, osum = 0.0; int len = global_len, up = 0, stillbadseq = 0, reject_os_tmp = 0; double dummy = 0.0; sq = ghmm_dseq_calloc (seq_number); if (!sq) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } if (len <= 0) /* A specific length of the sequences isn't given. As a model should have an end state, the constant MAX_SEQ_LEN is used. */ len = (int) MAX_SEQ_LEN; if (seed > 0) { GHMM_RNG_SET (RNG, seed); } else { tm = rand (); GHMM_RNG_SET (RNG, tm); fprintf (stderr, "# using rng '%s' seed=%ld\n", GHMM_RNG_NAME (RNG), tm); } n = 0; reject_os = reject_tmax = 0; while (n < seq_number) { /* Test: A new seed for each sequence */ /* ghmm_rng_timeseed(RNG); */ stillbadseq = badseq = 0; ARRAY_CALLOC (sq->seq[n], len); /* Get a random initial state i */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (i = 0; i < mo->N; i++) { sum += mo->s[i].pi; if (sum >= p) break; } /* Get a random initial output m */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (m = 0; m < mo->M; m++) { sum += mo->s[i].b[m]; if (sum >= p) break; } sq->seq[n][0] = m; state = 1; /* The first symbol chooses the start class */ class = mo->get_class (sq->seq[n], state); /*class = ghmm_cseq_class(&dummy, 0, &osum); */ /* dummy function */ while (state < len) { /* Get a new state */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (j = 0; j < mo->s[i].out_states; j++) { sum += mo->s[i].out_a[class][j]; if (sum >= p) break; } if (sum == 0.0) { if (mo->s[i].out_states > 0) { /* Repudiate the sequence, if all smo->s[i].out_a[class][.] == 0, that is, class "class" isn't used in the original data: go out of the while-loop, n should not be counted. */ /* printf("Zustand %d, class %d, len %d out_states %d \n", i, class, state, smo->s[i].out_states); */ badseq = 1; /* break; */ /* Try: If the class is "empty", try the neighbour class; first, sweep down to zero; if still no success, sweep up to COS - 1. If still no success --> Repudiate the sequence. */ if (class > 0 && up == 0) { class--; continue; } else if (class < mo->cos - 1) { class++; up = 1; continue; } else { stillbadseq = 1; break; } } else /* An end state is reached, get out of the while-loop */ break; } i = mo->s[i].out_id[j]; /* Get a random output m from state i */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (m = 0; m < mo->M; m++) { sum += mo->s[i].b[m]; if (sum >= p) break; } sq->seq[n][state] = m; /* Decide the class for the next step */ class = mo->get_class (sq->seq[n], state); /*class = ghmm_cseq_class(&dummy, state, &osum); */ /* dummy */ up = 0; state++; } /* while (state < len) , global_len depends on the data */ if (badseq) { reject_os_tmp++; } if (stillbadseq) { reject_os++; m_free (sq->seq[n]); /* printf("cl %d, s %d, %d\n", class, i, n); */ } else if (state > Tmax) { reject_tmax++; m_free (sq->seq[n]); } else { if (state < len) ARRAY_REALLOC (sq->seq[n], state); sq->seq_len[n] = state; /* ighmm_cvector_print(stdout, sq->seq[n], sq->seq_len[n]," "," ",""); */ n++; } /* printf("reject_os %d, reject_tmax %d\n", reject_os, reject_tmax); */ if (reject_os > 10000) { GHMM_LOG(LCONVERTED, "Reached max. no. of rejections\n"); break; } if (!(n % 1000)) printf ("%d Seqs. generated\n", n); } /* n-loop */ if (reject_os > 0) printf ("%d sequences rejected (os)!\n", reject_os); if (reject_os_tmp > 0) printf ("%d sequences changed class\n", reject_os_tmp - reject_os); if (reject_tmax > 0) printf ("%d sequences rejected (Tmax, %d)!\n", reject_tmax, Tmax); return (sq);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dseq_free (&sq); return (NULL);# undef CUR_PROC} /* data */#endifint ghmm_dsmodel_init_silent_states (ghmm_dsmodel * mo){#define CUR_PROC "ghmm_dsmodel_init_silent_states" int nSilentStates = 0; int i, m;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -