model.c
来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 2,232 行 · 第 1/5 页
C
2,232 行
if (fix_read) { ighmm_scanner_error (s, "identifier fix_state twice"); goto STOP; } ighmm_scanner_get_name (s); if (!strcmp (s->id, "vector")) { ighmm_scanner_consume (s, '{'); if (s->err) goto STOP; fix_vector = scanner_get_int_array (s, &len); if (len != mo->N) { ighmm_scanner_error (s, "wrong number of elements in fix_state"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; ighmm_scanner_consume (s, '}'); if (s->err) goto STOP; fix_read = 1; } else { ighmm_scanner_error (s, "unknown identifier"); goto STOP; } } else { ighmm_scanner_error (s, "unknown identifier"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; } /* while(..s->c-'}') */ ighmm_scanner_consume (s, '}'); if (s->err) goto STOP; /* No prior read --> give it the value -1 */ if (prior_read == 0) mo->prior = -1; /* Allocate memory for the model */ for (i = 0; i < mo->N; i++) { mo->s[i].out_states = ighmm_cmatrix_notzero_columns (a_matrix, i, mo->N); mo->s[i].in_states = ighmm_cmatrix_notzero_rows (a_matrix, i, mo->N); if (model_state_alloc (mo->s + i, mo->M, mo->s[i].in_states, mo->s[i].out_states)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* Assign the parameters to the model */ if (!a_matrix) { fprintf (stderr, "no A matrix specified in file!\n"); exit (1); } if (!b_matrix) { fprintf (stderr, "no B matrix specified in file!\n"); exit (1); } if (!fix_vector) { fprintf (stderr, "no fix_state vector specified in file!\n"); exit (1); } if (!pi_vector) { fprintf (stderr, "no Pi vector specified in file!\n"); exit (1); } if (model_copy_vectors (mo, i, a_matrix, b_matrix, pi_vector, fix_vector)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } ighmm_cmatrix_free (&a_matrix, mo->N); ighmm_cmatrix_free (&b_matrix, mo->N); m_free (pi_vector); return (mo);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ighmm_cmatrix_free (&a_matrix, mo->N); ighmm_cmatrix_free (&b_matrix, mo->N); m_free (pi_vector); ghmm_dmodel_free(&mo); return NULL;#undef CUR_PROC} /* ghmm_dmodel_direct_read */#endif /* GHMM_OBSOLETE *//*============================================================================*//* Produces models from given sequences */ghmm_dmodel **ghmm_dmodel_from_sequence (ghmm_dseq * sq, long *mo_number){#define CUR_PROC "ghmm_dmodel_from_sequence" long i; int max_symb; ghmm_dmodel **mo = NULL; ARRAY_CALLOC (mo, sq->seq_number); max_symb = ghmm_dseq_max_symbol (sq); for (i = 0; i < sq->seq_number; i++) mo[i] = ghmm_dmodel_generate_from_sequence (sq->seq[i], sq->seq_len[i], max_symb + 1); *mo_number = sq->seq_number; return mo;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ for (i = 0; i < *mo_number; i++) ghmm_dmodel_free(&(mo[i])); return NULL;#undef CUR_PROC} /* ghmm_dmodel_from_sequence */#ifdef GHMM_OBSOLETE/*============================================================================*//* Produces models form given sequences */ghmm_dmodel **ghmm_dmodel_from_sequence_ascii (scanner_t * s, long *mo_number){#define CUR_PROC "ghmm_dmodel_from_sequence_ascii" long i; int max_symb; ghmm_dmodel **mo = NULL; ghmm_dseq *sq = NULL; ighmm_scanner_consume (s, '{'); if (s->err) goto STOP; while (!s->err && !s->eof && s->c - '}') { ighmm_scanner_get_name (s); ighmm_scanner_consume (s, '='); if (s->err) goto STOP; /* Reads sequences on normal format */ if (!strcmp (s->id, "SEQ")) { sq = ghmm_dseq_read_alloc (s); if (!sq) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } else { ighmm_scanner_error (s, "unknown identifier"); goto STOP; } ighmm_scanner_consume (s, ';'); if (s->err) goto STOP; } /* while(..s->c-'}') */ ighmm_scanner_consume (s, '}'); if (s->err) goto STOP; ARRAY_CALLOC (mo, sq->seq_number); /* The biggest symbol that occurs */ max_symb = ghmm_dseq_max_symbol (sq); for (i = 0; i < sq->seq_number; i++) mo[i] = ghmm_dmodel_generate_from_sequence (sq->seq[i], sq->seq_len[i], max_symb + 1); *mo_number = sq->seq_number; ghmm_dseq_free (&sq); return mo;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dseq_free (&sq); for (i = 0; i < *mo_number; i++) ghmm_dmodel_free(&(mo[i])); return NULL;#undef CUR_PROC} /* ghmm_dmodel_from_sequence_ascii */#endif /* GHMM_OBSOLETE *//*============================================================================*/int ghmm_dmodel_free(ghmm_dmodel ** mo) {#define CUR_PROC "ghmm_dmodel_free" int i; mes_check_ptr (mo, return (-1)); mes_check_ptr (*mo, return (-1)); for (i=0; i < (*mo)->N && (*mo)->s; i++) ghmm_dstate_clean(&(*mo)->s[i]); if ((*mo)->s) m_free((*mo)->s); if ((*mo)->name) m_free((*mo)->name); if ((*mo)->model_type & GHMM_kSilentStates) { if ((*mo)->topo_order) m_free((*mo)->topo_order); m_free((*mo)->silent); } if ((*mo)->model_type & GHMM_kTiedEmissions) m_free((*mo)->tied_to); if ((*mo)->pow_lookup) m_free((*mo)->pow_lookup); if ((*mo)->model_type & GHMM_kBackgroundDistributions) m_free((*mo)->background_id); if ((*mo)->model_type & GHMM_kHigherOrderEmissions) m_free((*mo)->order); if ((*mo)->model_type & GHMM_kLabeledStates) m_free((*mo)->label); m_free(*mo); return (0);#undef CUR_PROC} /* ghmm_dmodel_free */int ghmm_dbackground_free (ghmm_dbackground * bg){#define CUR_PROC "ghmm_dbackground_free" if (bg->order) m_free(bg->order); if (bg->b) ighmm_cmatrix_free(&(bg->b), bg->n); m_free (bg); return (0);#undef CUR_PROC}/*============================================================================*/ghmm_dmodel *ghmm_dmodel_copy (const ghmm_dmodel * mo){# define CUR_PROC "ghmm_dmodel_copy" int i, j, nachf, vorg, m, size; ghmm_dmodel *m2 = NULL; ARRAY_CALLOC (m2, 1); ARRAY_CALLOC (m2->s, mo->N); if (mo->model_type & GHMM_kSilentStates) ARRAY_CALLOC (m2->silent, mo->N); if (mo->model_type & GHMM_kTiedEmissions) ARRAY_CALLOC (m2->tied_to, mo->N); if (mo->model_type & GHMM_kBackgroundDistributions) { ARRAY_CALLOC (m2->background_id, mo->N); m2->bp = mo->bp; } if (mo->model_type & GHMM_kHigherOrderEmissions) ARRAY_CALLOC (m2->order, mo->N); if (mo->model_type & GHMM_kLabeledStates) ARRAY_CALLOC (m2->label, mo->N); ARRAY_MALLOC(m2->pow_lookup, mo->maxorder+2); for (i = 0; i < mo->N; i++) { if (mo->model_type & GHMM_kHigherOrderEmissions) size = ghmm_ipow(mo, mo->M, mo->order[i]+1); else size = mo->M; nachf = mo->s[i].out_states; vorg = mo->s[i].in_states; ARRAY_CALLOC (m2->s[i].out_id, nachf); ARRAY_CALLOC (m2->s[i].out_a, nachf); ARRAY_CALLOC (m2->s[i].in_id, vorg); ARRAY_CALLOC (m2->s[i].in_a, vorg); ARRAY_CALLOC (m2->s[i].b, size); /* copy the values */ for (j = 0; j < nachf; j++) { m2->s[i].out_a[j] = mo->s[i].out_a[j]; m2->s[i].out_id[j] = mo->s[i].out_id[j]; } for (j = 0; j < vorg; j++) { m2->s[i].in_a[j] = mo->s[i].in_a[j]; m2->s[i].in_id[j] = mo->s[i].in_id[j]; } /* copy all b values for higher order states */ for (m = 0; m < size; m++) m2->s[i].b[m] = mo->s[i].b[m]; m2->s[i].pi = mo->s[i].pi; m2->s[i].fix = mo->s[i].fix; if (mo->model_type & GHMM_kSilentStates) m2->silent[i] = mo->silent[i]; if (mo->model_type & GHMM_kTiedEmissions) m2->tied_to[i] = mo->tied_to[i]; if (mo->model_type & GHMM_kLabeledStates) m2->label[i] = mo->label[i]; if (mo->model_type & GHMM_kHigherOrderEmissions) m2->order[i] = mo->order[i]; if (mo->model_type & GHMM_kBackgroundDistributions) m2->background_id[i] = mo->background_id[i]; m2->s[i].out_states = nachf; m2->s[i].in_states = vorg; } m2->N = mo->N; m2->M = mo->M; m2->prior = mo->prior; if (mo->model_type & GHMM_kHigherOrderEmissions) { m2->maxorder = mo->maxorder; for (i=0; i < mo->maxorder+2; i++) m2->pow_lookup[i] = mo->pow_lookup[i]; } m2->model_type = mo->model_type; /* not necessary but the history is at least initialised */ m2->emission_history = mo->emission_history; return (m2);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dmodel_free (&m2); return (NULL);# undef CUR_PROC} /* ghmm_dmodel_copy *//*============================================================================*/int ghmm_dmodel_check(const ghmm_dmodel * mo) {# define CUR_PROC "ghmm_dmodel_check" int res = -1; double sum; int i, j, imag=0; char *str; /* The sum of the Pi[i]'s is 1 */ sum = 0.0; for (i = 0; i < mo->N; i++) sum += mo->s[i].pi; if (fabs(sum-1.0) >= GHMM_EPS_PREC) { GHMM_LOG(LERROR, "sum Pi[i] != 1.0\n"); goto STOP; } /* check each state */ for (i=0; i<mo->N; i++) { sum = 0.0; if (mo->s[i].out_states == 0) { str = ighmm_mprintf(NULL, 0, "out_states = 0 (state %d -> final state!)", i); GHMM_LOG(LDEBUG, str); m_free(str) } /* Sum the a[i][j]'s : normalized out transitions */ for (j=0; j < mo->s[i].out_states; j++) sum += mo->s[i].out_a[j]; if (j>0 && fabs(sum-1.0) >= GHMM_EPS_PREC) { str = ighmm_mprintf(NULL, 0, "sum of s[%d].out_a[*] (%g) not equal 1.0", i, sum); GHMM_LOG(LWARN, str); m_free(str); goto STOP; } /* Sum the a[i][j]'s : normalized in transitions */ sum = mo->s[i].pi; for (j=0; j<mo->s[i].in_states; j++) sum += mo->s[i].in_a[j]; if (fabs(sum) <= GHMM_EPS_PREC) { imag = 1; str = ighmm_mprintf(NULL, 0, "state %d can't be reached", i); GHMM_LOG(LINFO, str); m_free(str); } /* Sum the b[j]'s: normalized emission probs */ sum = 0.0; for (j=0; j<mo->M; j++) sum += mo->s[i].b[j]; if (imag) { /* not reachable states */ if ((fabs(sum + mo->M ) >= GHMM_EPS_PREC)) { str = ighmm_mprintf(NULL, 0, "state %d can't be reached but is not set" " as non-reachale state", i); GHMM_LOG(LWARN, str); m_free(str); goto STOP; } } else if ((mo->model_type & GHMM_kSilentStates) && mo->silent[i]) { /* silent states */ if (sum != 0.0) { str = ighmm_mprintf(NULL, 0, "state %d is silent but has a non-zero" " emission probability", i); GHMM_LOG(LWARN, str); m_free(str); goto STOP; } } else { /* normal states */ if (fabs(sum-1.0) >= GHMM_EPS_PREC) { str = ighmm_mprintf(NULL, 0, "sum s[%d].b[*] = %f != 1.0", i, sum); GHMM_LOG(LCONVERTED, str); m_free(str); goto STOP; } } } res = 0;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return (res);# undef CUR_PROC} /* ghmm_dmodel_check *//*============================================================================*/int ghmm_dmodel_check_compatibility (ghmm_dmodel ** mo, int model_number){#define CUR_PROC "ghmm_dmodel_check_compatibility" int i; for (i = 1; i < model_number; i++) if (-1 == ghmm_dmodel_check_compatibel_models (mo[0], mo[i])) return -1; return 0;#undef CUR_PROC}/*============================================================================*/int ghmm_dmodel_check_compatibel_models (const ghmm_dmodel * mo, const ghmm_dmodel * m2){#define CUR_PROC "ghmm_dmodel_check_compatibel_models" int i, j; char *str; if (mo->N != m2->N) { str = ighmm_mprintf(NULL, 0, "ERROR: different number of states (%d != %d)\n", mo->N, m2->N); goto STOP; } if (mo->M != m2->M) { str = ighmm_mprintf(NULL, 0, "ERROR: different number of possible outputs (%d != %d)\n", mo->M, m2->M); goto STOP; } for (i=0; i<mo->N; ++i) { if (mo->s[i].out_states != m2->s[i].out_states) { str = ighmm_mprintf(NULL, 0, "ERROR: different number of outstates (%d != %d) in state %d.\n", mo->s[i].out_states, m2->s[i].out_states, i); goto STOP; } for (j=0; j<mo->s[i].out_states; ++j) { if (mo->s[i].out_id[j] != m2->s[i].out_id[j]) { str = ighmm_mprintf(NULL, 0, "ERROR: different out_ids (%d != %d) in entry %d of state %d.\n", mo->s[i].out_id[j], m2->s[i].out_id[j], j, i); goto STOP; } } } return 0;STOP: GHMM_LOG(LCONVERTED, str); m_free (str);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?