📄 model.c
字号:
/******************************************************************************* author : Bernd Wichern filename : /zpr/bspk/src/hmm/ghmm/ghmm/model.c created : TIME: 10:47:27 DATE: Fri 19. December 1997 $Id: model.c,v 1.21 2004/04/07 09:43:29 cic99 Exp $Copyright (C) 1998-2001, ZAIK/ZPR, Universit鋞 zu K鰈nThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or(at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*******************************************************************************/#include <float.h>#include <math.h>#include "model.h"#include "matrix.h"#include "sequence.h"#include "const.h"#include "rng.h"#include "foba.h"#include "mes.h"#include "mprintf.h"#include "string.h"#include "ghmm.h"#include "modelutil.h"#define __EPS 10e-6typedef enum DFSFLAG { DONE, NOTVISITED, VISITED } DFSFLAG;/*typedef struct local_store_t { DFSFLAG *colors; int *topo_order; int topo_order_length;} local_store_t;static local_store_t *topo_alloc(model *mo, int len);static int topo_free(local_store_t **v, int n, int cos, int len); *//*----------------------------------------------------------------------------*/static int model_state_alloc(state *state, int M, int in_states, int out_states) {# define CUR_PROC "model_state_alloc" int res = -1; if(!m_calloc(state->b, M)) {mes_proc(); goto STOP;} if (out_states > 0) { if(!m_calloc(state->out_id, out_states)) {mes_proc(); goto STOP;} if(!m_calloc(state->out_a, out_states)) {mes_proc(); goto STOP;} } if (in_states > 0) { if(!m_calloc(state->in_id, in_states)) {mes_proc(); goto STOP;} if(!m_calloc(state->in_a, in_states)) {mes_proc(); goto STOP;} } res = 0;STOP: return(res);# undef CUR_PROC} /* model_state_alloc *//*----------------------------------------------------------------------------*/static int model_copy_vectors(model *mo, int index, double **a_matrix, double **b_matrix, double *pi, int *fix) {#define CUR_PROC "model_copy_vectors" int i, cnt_out = 0, cnt_in = 0; mo->s[index].pi = pi[index]; mo->s[index].fix = fix[index]; for (i = 0; i < mo->M; i++) mo->s[index].b[i] = b_matrix[index][i]; for (i = 0; i < mo->N; i++) { if (a_matrix[index][i]) { /* Transitions to a following state possible */ if (cnt_out >= mo->s[index].out_states) {mes_proc(); return(-1);} mo->s[index].out_id[cnt_out] = i; mo->s[index].out_a[cnt_out] = a_matrix[index][i]; cnt_out++; } if (a_matrix[i][index]) { /* Transitions to a previous state possible */ if (cnt_in >= mo->s[index].in_states) {mes_proc(); return(-1);} mo->s[index].in_id[cnt_in] = i; mo->s[index].in_a[cnt_in] = a_matrix[i][index]; cnt_in++; } } return(0);#undef CUR_PROC} /* model_copy_vectors *//*============================================================================*//* Old prototype:model **model_read(char *filename, int *mo_number, int **seq, const int *seq_len, int seq_number) { */model **model_read(char *filename, int *mo_number) {#define CUR_PROC "model_read" int j; long new_models = 0; scanner_t *s = NULL; model **mo = NULL; *mo_number = 0; s = scanner_alloc(filename); if(!s) {mes_proc(); goto STOP;} while(!s->err && !s->eof) { scanner_get_name(s); scanner_consume(s, '='); if(s->err) goto STOP; if (!strcmp(s->id, "HMM") || !strcmp(s->id, "hmm")) { (*mo_number)++; /* more mem */ if (m_realloc(mo, *mo_number)) { mes_proc(); goto STOP; } mo[*mo_number - 1] = model_direct_read(s, (int *) &new_models); if (!mo[*mo_number - 1]) { mes_proc(); goto STOP; } /* Copies the model, that has already been read. */ if (new_models > 1) { /* "-1" because mo_number++ has already been done. */ if (m_realloc(mo, *mo_number - 1 + new_models)) { mes_proc(); goto STOP; } for (j = 1; j < new_models; j++) { mo[*mo_number] = model_copy(mo[*mo_number - 1]); if (!mo[*mo_number]) { mes_proc(); goto STOP; } (*mo_number)++; } } } else if (!strcmp(s->id, "HMM_SEQ")) { model **tmp_mo = NULL; tmp_mo = model_from_sequence_ascii(s, &new_models); if (m_realloc(mo, (*mo_number + new_models))) { mes_proc(); goto STOP; } for (j = 0; j < new_models; j++) { if (!tmp_mo[j]) { mes_proc(); goto STOP; } mo[*mo_number] = tmp_mo[j]; (*mo_number)++; } } else { scanner_error(s, "unknown identifier"); goto STOP; } scanner_consume(s, ';'); if(s->err) goto STOP; } /* while(!s->err && !s->eof) */ return mo;STOP: return NULL;#undef CUR_PROC} /* model_read *//*============================================================================*/model *model_direct_read(scanner_t *s, int *multip){#define CUR_PROC "model_direct_read" int i, m_read, n_read, a_read, b_read, pi_read, len, prior_read, fix_read; model *mo = NULL; double **a_matrix = NULL, **b_matrix = NULL; double *pi_vector = NULL; int *fix_vector = NULL; m_read = n_read = a_read = b_read = pi_read = prior_read = fix_read = 0; *multip = 1; /* default */ if (!(m_calloc(mo, 1))) { mes_proc(); goto STOP; } scanner_consume( s, '{' ); if(s->err) goto STOP; while(!s->err && !s->eof && s->c - '}') { scanner_get_name(s); if (strcmp(s->id, "M") && strcmp(s->id, "N") && strcmp(s->id, "Pi") && strcmp(s->id, "A") && strcmp(s->id, "B") && strcmp(s->id, "multip") && strcmp(s->id, "prior") && strcmp(s->id, "fix_state")) { scanner_error(s, "unknown identifier"); goto STOP; } scanner_consume(s, '='); if(s->err) goto STOP; if (!strcmp(s->id, "multip")) { *multip = scanner_get_int(s); if (*multip < 1) {/* Doesn't make any sense! */ *multip = 1; mes_prot("Multip < 1 ignored\n"); } } else if (!strcmp(s->id, "M")) {/*Number of output values*/ if (m_read) {scanner_error(s, "identifier M twice"); goto STOP;} mo->M = scanner_get_int(s); m_read = 1; } else if (!strcmp(s->id, "N")) {/*Number of states*/ if (n_read) {scanner_error(s, "identifier N twice"); goto STOP;} mo->N = scanner_get_int(s); if (!m_calloc(mo->s, mo->N)) {mes_proc(); goto STOP;} n_read = 1; } else if (!strcmp(s->id, "A")) {/*Transition probability*/ if (!n_read) {scanner_error(s, "need N as a range for A"); goto STOP;} if (a_read) {scanner_error(s, "identifier A twice"); goto STOP;} if (!m_calloc(a_matrix, mo->N)) {mes_proc(); goto STOP;} scanner_get_name(s); if (!strcmp(s->id, "matrix")) { if (matrix_d_read(s, a_matrix, mo->N, mo->N)){ scanner_error(s, "unable to read matrix A"); goto STOP; } a_read = 1; } else { scanner_error(s, "unknown identifier"); goto STOP; } } else if (!strcmp(s->id, "B")) {/*Output probability*/ if ((!n_read)||(!m_read)){ scanner_error(s, "need M and N as a range for B"); goto STOP; } if (b_read) {scanner_error(s, "identifier B twice"); goto STOP;} if (!m_calloc(b_matrix, mo->N)) {mes_proc(); goto STOP;} scanner_get_name(s); if (!strcmp(s->id, "matrix")) { if(matrix_d_read(s, b_matrix, mo->N, mo->M)) { scanner_error(s, "unable to read matrix B"); goto STOP; } b_read = 1; } else { scanner_error(s, "unknown identifier"); goto STOP; } } else if (!strcmp(s->id, "prior")) {/*A prior model*/ if (prior_read) {scanner_error(s,"identifier prior twice");goto STOP;} mo->prior = scanner_get_edouble(s); if ((mo->prior < 0 || mo->prior > 1) && mo->prior != -1) { scanner_error(s, "invalid model prior"); goto STOP; } prior_read = 1; } else if (!strcmp(s->id, "Pi")) {/*Initial state probabilty*/ if (!n_read) {scanner_error(s, "need N as a range for Pi"); goto STOP;} if (pi_read) {scanner_error(s, "identifier Pi twice"); goto STOP;} scanner_get_name(s); if (!strcmp(s->id, "vector")) { scanner_consume( s, '{' ); if(s->err) goto STOP; pi_vector = scanner_get_double_earray(s, &len); if (len != mo->N) {scanner_error(s, "wrong number of elements in PI"); goto STOP;} scanner_consume( s, ';' ); if(s->err) goto STOP; scanner_consume( s, '}' ); if(s->err) goto STOP; pi_read = 1; } else { scanner_error(s, "unknown identifier"); goto STOP; } } else if (!strcmp(s->id, "fix_state")) { if (!n_read) {scanner_error(s, "need N as a range for fix_state"); goto STOP;} if (fix_read) {scanner_error(s, "identifier fix_state twice"); goto STOP;} scanner_get_name(s); if (!strcmp(s->id, "vector")) { scanner_consume( s, '{' ); if(s->err) goto STOP; fix_vector = scanner_get_int_array(s, &len); if (len != mo->N) {scanner_error(s, "wrong number of elements in fix_state"); goto STOP;} scanner_consume( s, ';' ); if(s->err) goto STOP; scanner_consume( s, '}' ); if(s->err) goto STOP; fix_read = 1; } else { scanner_error(s, "unknown identifier"); goto STOP; } } else { scanner_error(s, "unknown identifier"); goto STOP; } scanner_consume( s, ';' ); if(s->err) goto STOP; } /* while(..s->c-'}') */ scanner_consume( s, '}' ); if(s->err) goto STOP; /* No prior read --> give it the value -1 */ if (prior_read == 0) mo->prior = -1; /* Allocate memory for the model */ for (i = 0; i < mo->N; i++) { mo->s[i].out_states = matrix_d_notzero_columns(a_matrix, i, mo->N); mo->s[i].in_states = matrix_d_notzero_rows(a_matrix, i, mo->N); if (model_state_alloc(mo->s + i, mo->M, mo->s[i].in_states, mo->s[i].out_states)) { mes_proc(); goto STOP; } /* Assign the parameters to the model */ if (!a_matrix) { fprintf(stderr,"no A matrix specified in file!\n"); exit(1); } if (!b_matrix) { fprintf(stderr,"no B matrix specified in file!\n"); exit(1); } if (!fix_vector) { fprintf(stderr,"no fix_state vector specified in file!\n"); exit(1); } if (!pi_vector) { fprintf(stderr,"no Pi vector specified in file!\n"); exit(1); } if(model_copy_vectors(mo, i, a_matrix, b_matrix, pi_vector, fix_vector)) { mes_proc(); goto STOP; } } matrix_d_free(&a_matrix, mo->N); matrix_d_free(&b_matrix, mo->N ); m_free(pi_vector); return(mo);STOP: matrix_d_free(&a_matrix, mo->N); matrix_d_free(&b_matrix, mo->N); m_free(pi_vector); model_free(&mo); return NULL;#undef CUR_PROC} /* model_direct_read *//*============================================================================*//* Produces models from given sequences */model **model_from_sequence(sequence_t *sq, long *mo_number){#define CUR_PROC "model_from_sequence" long i; int max_symb; model **mo = NULL; if (!m_calloc(mo, sq->seq_number)) { mes_proc(); goto STOP; } max_symb = sequence_max_symbol(sq); for (i = 0; i < sq->seq_number; i++) mo[i] = model_generate_from_sequence(sq->seq[i], sq->seq_len[i], max_symb + 1); *mo_number = sq->seq_number; return mo; STOP: for (i = 0; i < *mo_number; i++) model_free(&(mo[i])); return NULL;#undef CUR_PROC} /* model_from_sequence *//*============================================================================*//* Produces models form given sequences */model **model_from_sequence_ascii(scanner_t *s, long *mo_number){#define CUR_PROC "model_from_sequence_ascii" long i; int max_symb; model **mo = NULL; sequence_t *sq = NULL; scanner_consume( s, '{' ); if(s->err) goto STOP; while(!s->err && !s->eof && s->c - '}') { scanner_get_name(s); scanner_consume(s, '='); if(s->err) goto STOP; /* Reads sequences on normal format */ if (!strcmp(s->id, "SEQ")) { sq = sequence_read_alloc(s); if (!sq) { mes_proc(); goto STOP; } } else { scanner_error(s, "unknown identifier"); goto STOP; } scanner_consume( s, ';' ); if(s->err) goto STOP; } /* while(..s->c-'}') */ scanner_consume( s, '}' ); if(s->err) goto STOP; if (!m_calloc(mo, sq->seq_number)) { mes_proc(); goto STOP; } /* The biggest symbol that occurs */ max_symb = sequence_max_symbol(sq); for (i = 0; i < sq->seq_number; i++) mo[i] = model_generate_from_sequence(sq->seq[i], sq->seq_len[i], max_symb + 1); *mo_number = sq->seq_number; sequence_free(&sq); return mo; STOP: sequence_free(&sq); for (i = 0; i < *mo_number; i++) model_free(&(mo[i])); return NULL;#undef CUR_PROC} /* model_from_sequence_ascii *//*============================================================================*/int model_free(model **mo) {#define CUR_PROC "model_free" int i; mes_check_ptr(mo, return(-1)); if( !*mo ) return(0); for (i = 0; i < (*mo)->N; i++) state_clean(&(*mo)->s[i]); if ( (*mo)->s) m_free((*mo)->s); if ((*mo) -> silent) m_free((*mo)->silent); if ((*mo) -> tied_to) m_free((*mo)->tied_to); /*if ((*mo) -> emission_order) Moved to state m_free((*mo)->emission_order);*/ if ((*mo) -> topo_order) m_free((*mo)->topo_order); m_free(*mo); return(0);#undef CUR_PROC} /* model_free */ /*===========================================================================int model_free(model **mo) {#define CUR_PROC "sdmodel_free" state *my_state; int i; mes_check_ptr(mo, return(-1)); if( !*mo ) return(0); for (i = 0; i < (*mo)->N; i++) { my_state = &((*mo)->s[i]); if (my_state->b) m_free(my_state->b); if (my_state->out_id) m_free(my_state->out_id); if (my_state->in_id) m_free(my_state->in_id); if (my_state->out_a) m_free(my_state->out_a); if (my_state->in_a) m_free(my_state->in_a); /*if (my_state->out_a) matrix_d_free(&((*mo)->s[i].out_a), (*mo)->cos); if (my_state->in_a) matrix_d_free(&((*mo)->s[i].in_a), (*mo)->cos); printf("Free every thing set it to NULL\n"); my_state->pi = 0; my_state->b = NULL; my_state->out_id = NULL; my_state->in_id = NULL; my_state->out_a = NULL; my_state->in_a = NULL; my_state->out_states = 0; my_state->in_states = 0; my_state->fix = 0; } m_free((*mo)->s); m_free(*mo); return(0);#undef CUR_PROC} /* model_free */ /*============================================================================*/model *model_copy(const model *mo) {# define CUR_PROC "model_copy" int i, j, nachf, vorg, m; model *m2 = NULL; if(!m_calloc(m2, 1)) {mes_proc(); goto STOP;} if (!m_calloc(m2->s, mo->N)) {mes_proc(); goto STOP;} for (i = 0; i < mo->N; i++) { nachf = mo->s[i].out_states; vorg = mo->s[i].in_states; if(!m_calloc(m2->s[i].out_id, nachf)) {mes_proc(); goto STOP;} if(!m_calloc(m2->s[i].out_a, nachf)) {mes_proc(); goto STOP;} if(!m_calloc(m2->s[i].in_id, vorg)) {mes_proc(); goto STOP;} if(!m_calloc(m2->s[i].in_a, vorg)) {mes_proc(); goto STOP;} if(!m_calloc(m2->s[i].b, mo->M)) {mes_proc(); goto STOP;} /* Copy the values */ for (j = 0; j < nachf; j++) { m2->s[i].out_a[j] = mo->s[i].out_a[j]; m2->s[i].out_id[j] = mo->s[i].out_id[j]; } for (j = 0; j < vorg; j++) { m2->s[i].in_a[j] = mo->s[i].in_a[j]; m2->s[i].in_id[j] = mo->s[i].in_id[j]; } for (m = 0; m < mo->M; m++) m2->s[i].b[m] = mo->s[i].b[m]; m2->s[i].pi = mo->s[i].pi; m2->s[i].out_states = nachf; m2->s[i].in_states = vorg; } m2->N = mo->N; m2->M = mo->M; m2->prior = mo->prior; return(m2);STOP: model_free(&m2); return(NULL);# undef CUR_PROC} /* model_copy *//*============================================================================*/int model_check(const model* mo) {# define CUR_PROC "model_check" int res = -1; double sum; int i,j; char *str; /* The sum of the Pi[i]'s is 1 */ sum = 0.0; for (i = 0; i < mo->N; i++) { sum += mo->s[i].pi; } if ( fabs(sum - 1.0) >= EPS_PREC ) { mes_prot("sum Pi[i] != 1.0\n"); goto STOP; } /* check each state */ for (i = 0; i < mo->N; i++) { sum = 0.0; if (mo->s[i].out_states == 0) { char *str = mprintf(NULL,0,"out_states = 0 (state %d -> final state!)\n",i); mes_prot(str); } /* Sum the a[i][j]'s : normalized out transitions */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -