model.c
来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 2,232 行 · 第 1/5 页
C
2,232 行
return (-1);#undef CUR_PROC} /* ghmm_dmodel_check_compatibility *//*============================================================================*/ghmm_dmodel *ghmm_dmodel_generate_from_sequence (const int *seq, int seq_len, int anz_symb){#define CUR_PROC "ghmm_dmodel_generate_from_sequence" int i; ghmm_dmodel *mo = NULL; ghmm_dstate *s = NULL; ARRAY_CALLOC (mo, 1); mo->N = seq_len; mo->M = anz_symb; /* All models generated from sequences have to be LeftRight-models */ mo->model_type = GHMM_kLeftRight; /* Allocate memory for all vectors */ ARRAY_CALLOC (mo->s, mo->N); for (i = 0; i < mo->N; i++) { if (i == 0) { /* Initial state */ if (model_state_alloc (mo->s, mo->M, 0, 1)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } else if (i == mo->N - 1) { /* End state */ if (model_state_alloc (mo->s + i, mo->M, 1, 0)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } else { /* others */ if (model_state_alloc (mo->s + i, mo->M, 1, 1)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } } } /* Allocate states with the right values, the initial state and the end state extra */ for (i = 1; i < mo->N - 1; i++) { s = mo->s + i; s->pi = 0.0; s->out_states = 1; s->in_states = 1; s->b[seq[i]] = 1.0; /* others stay 0 */ *(s->out_id) = i + 1; *(s->in_id) = i - 1; *(s->out_a) = *(s->in_a) = 1.0; } /* Initial state */ s = mo->s; s->pi = 1.0; s->out_states = 1; s->in_states = 0; s->b[seq[0]] = 1.0; *(s->out_id) = 1; *(s->out_a) = 1.0; /* No in_id and in_a */ /* End state */ s = mo->s + mo->N - 1; s->pi = 0.0; s->out_states = 0; s->in_states = 1; s->b[seq[mo->N - 1]] = 1.0; /* All other b's stay zero */ *(s->in_id) = mo->N - 2; *(s->in_a) = 1.0; /* No out_id and out_a */ if (ghmm_dmodel_check (mo)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } return mo;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dmodel_free (&mo); return NULL;#undef CUR_PROC} /* ghmm_dmodel_generate_from_sequence *//*===========================================================================*/ static int get_random_output (ghmm_dmodel * mo, int i, int position){#define CUR_PROC "get_random_output" int m, e_index; double p, sum=0.0; p = GHMM_RNG_UNIFORM (RNG); for (m = 0; m < mo->M; m++) { /* get the right index for higher order emission models */ e_index = get_emission_index(mo, i, m, position); /* get the probability, exit, if the index is -1 */ if (-1 != e_index) { sum += mo->s[i].b[e_index]; if (sum >= p) break; } else { fprintf (stderr, "ERROR: State has order %d, but in the history are only %d emissions.\n", mo->order[i], position); return -1; } } if (mo->M == m) { fprintf (stderr, "ERROR: no valid output choosen. Are the Probabilities correct? sum: %g, p: %g\n", sum, p); return -1; } return (m);#undef CUR_PROC} /* get_random_output *//*============================================================================*/ghmm_dseq *ghmm_dmodel_generate_sequences(ghmm_dmodel* mo, int seed, int global_len, long seq_number, int Tmax){#define CUR_PROC "ghmm_dmodel_generate_sequences" ghmm_dseq *sq = NULL; int state; int j, m, j_id; double p, sum, max_sum; int len = global_len; int n = 0; int pos, label_pos; sq = ghmm_dseq_calloc(seq_number); if (!sq) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* allocating additional fields for the state sequence in the ghmm_dseq struct */ ARRAY_CALLOC(sq->states, seq_number); ARRAY_CALLOC(sq->states_len, seq_number); /* A specific length of the sequences isn't given. As a model should have an end state, the konstant MAX_SEQ_LEN is used. */ if (len <= 0) len = (int)GHMM_MAX_SEQ_LEN; if (seed > 0) { GHMM_RNG_SET(RNG, seed); } /* initialize the emission history */ mo->emission_history = 0; while (n < seq_number) { ARRAY_CALLOC(sq->seq[n], len); /* for silent models we have to allocate for the maximal possible number of lables and states */ if (mo->model_type & GHMM_kSilentStates) { ARRAY_CALLOC(sq->states[n], len * mo->N); } else { ARRAY_CALLOC(sq->states[n], len); } pos = label_pos = 0; /* Get a random initial state i */ p = GHMM_RNG_UNIFORM(RNG); sum = 0.0; for (state=0; state < mo->N; state++) { sum += mo->s[state].pi; if (sum >= p) break; } while (pos < len) { /* save the state path and label */ sq->states[n][label_pos] = state; label_pos++; /* Get a random output m if the state is not a silent state */ if (!(mo->model_type & GHMM_kSilentStates) || !(mo->silent[state])) { m = get_random_output(mo, state, pos); update_emission_history(mo, m); sq->seq[n][pos] = m; pos++; } /* get next state */ p = GHMM_RNG_UNIFORM(RNG); if (pos < mo->maxorder) { max_sum = 0.0; for (j = 0; j < mo->s[state].out_states; j++) { j_id = mo->s[state].out_id[j]; if (!(mo->model_type & GHMM_kHigherOrderEmissions) || mo->order[j_id] < pos) max_sum += mo->s[state].out_a[j]; } if (j && fabs(max_sum) < GHMM_EPS_PREC) { GHMM_LOG_PRINTF(LERROR, LOC, "No possible transition from state %d " "since all successor states require more history " "than seen up to position: %d.", state, pos); break; } if (j) p *= max_sum; } sum = 0.0; for (j = 0; j < mo->s[state].out_states; j++) { j_id = mo->s[state].out_id[j]; if (!(mo->model_type & GHMM_kHigherOrderEmissions) || mo->order[j_id] < pos) { sum += mo->s[state].out_a[j]; if (sum >= p) break; } } if (sum == 0.0) { GHMM_LOG_PRINTF(LINFO, LOC, "final state (%d) reached at position %d " "of sequence %d", state, pos, n); break; } state = j_id; } /* while (pos < len) */ /* realocate state path and label sequence to actual size */ if (mo->model_type & GHMM_kSilentStates) { ARRAY_REALLOC(sq->states[n], label_pos); } sq->seq_len[n] = pos; sq->states_len[n] = label_pos; n++; } /* while( n < seq_number ) */ return (sq);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dseq_free(&sq); return NULL;#undef CUR_PROC}/*============================================================================*/double ghmm_dmodel_likelihood (ghmm_dmodel * mo, ghmm_dseq * sq){# define CUR_PROC "ghmm_dmodel_likelihood" double log_p_i, log_p; int found, i; char *str; /* printf("*** model_likelihood:\n"); */ found = 0; log_p = 0.0; for (i = 0; i < sq->seq_number; i++) {/* printf("sequence:\n"); *//* for (j=0;j < sq->seq_len[i];j++) { *//* printf("%d, ",sq->seq[i][j]); *//* } *//* printf("\n"); */ if (ghmm_dmodel_logp (mo, sq->seq[i], sq->seq_len[i], &log_p_i) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; }/* printf("\nlog_p_i = %f\n", log_p_i); */ if (log_p_i != +1) { log_p += log_p_i; found = 1; } else { str = ighmm_mprintf (NULL, 0, "sequence[%d] can't be build.\n", i); GHMM_LOG(LCONVERTED, str); } } if (!found) log_p = +1.0; return (log_p);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return -1;# undef CUR_PROC} /* ghmm_dmodel_likelihood */double ghmm_dmodel_get_transition(ghmm_dmodel* mo, int i, int j){# define CUR_PROC "ghmm_dmodel_get_transition" int out; if (mo->s && mo->s[i].out_a && mo->s[j].in_a) { for (out=0; out < mo->s[i].out_states; out++) { if (mo->s[i].out_id[out] == j) return mo->s[i].out_a[out]; } } return 0.0;# undef CUR_PROC} /* ghmm_dmodel_get_transition */void ghmm_dmodel_set_transition (ghmm_dmodel * mo, int i, int j, double prob){# define CUR_PROC "ghmm_dmodel_set_transition" int in, out; if (mo->s && mo->s[i].out_a && mo->s[j].in_a) { for (out = 0; out < mo->s[i].out_states; out++) { if (mo->s[i].out_id[out] == j) { mo->s[i].out_a[out] = prob; /* fprintf(stderr, CUR_PROC": State %d, %d, = %f\n", i, j, prob); */ break; } } for (in = 0; in < mo->s[j].in_states; in++) { if (mo->s[j].in_id[in] == i) { mo->s[j].in_a[in] = prob; break; } } }# undef CUR_PROC} /* ghmm_dmodel_set_transition *//*============================================================================*//* Some outputs *//*============================================================================*/void ghmm_dmodel_states_print (FILE * file, ghmm_dmodel * mo){ int i, j; fprintf (file, "Modelparameters: \n M = %d \t N = %d\n", mo->M, mo->N); for (i = 0; i < mo->N; i++) { fprintf (file, "\nState %d \n PI = %.3f \n out_states = %d \n in_states = %d \n", i, mo->s[i].pi, mo->s[i].out_states, mo->s[i].in_states); fprintf (file, " Output probability:\t"); for (j = 0; j < mo->M; j++) fprintf (file, "%.3f \t", mo->s[i].b[j]); fprintf (file, "\n Transition probability \n"); fprintf (file, " Out states (Id, a):\t"); for (j = 0; j < mo->s[i].out_states; j++) fprintf (file, "(%d, %.3f) \t", mo->s[i].out_id[j], mo->s[i].out_a[j]); fprintf (file, "\n"); fprintf (file, " In states (Id, a):\t"); for (j = 0; j < mo->s[i].in_states; j++) fprintf (file, "(%d, %.3f) \t", mo->s[i].in_id[j], mo->s[i].in_a[j]); fprintf (file, "\n"); }} /* ghmm_dmodel_states_print *//*============================================================================*/void ghmm_dmodel_A_print (FILE * file, ghmm_dmodel * mo, char *tab, char *separator, char *ending){ int i, j, out_state; for (i = 0; i < mo->N; i++) { out_state = 0; fprintf (file, "%s", tab); if (mo->s[i].out_states > 0 && mo->s[i].out_id[out_state] == 0) { fprintf (file, "%.2f", mo->s[i].out_a[out_state]); out_state++; } else fprintf (file, "0.00"); for (j = 1; j < mo->N; j++) { if (mo->s[i].out_states > out_state && mo->s[i].out_id[out_state] == j) { fprintf (file, "%s %.2f", separator, mo->s[i].out_a[out_state]); out_state++; } else fprintf (file, "%s 0.00", separator); } fprintf (file, "%s\n", ending); }} /* ghmm_dmodel_A_print *//*============================================================================*/void ghmm_dmodel_B_print (FILE * file, ghmm_dmodel * mo, char *tab, char *separator, char *ending){ int i, j, size; for (i = 0; i < mo->N; i++) { fprintf (file, "%s", tab); fprintf (file, "%.2f", mo->s[i].b[0]); if (!(mo->model_type & GHMM_kHigherOrderEmissions)) { for (j = 1; j < mo->M; j++) fprintf (file, "%s %.2f", separator, mo->s[i].b[j]); fprintf (file, "%s\n", ending); } else { size = ghmm_ipow (mo, mo->M, mo->order[i] + 1); for (j = 1; j < size; j++) fprintf (file, "%s %.2f", separator, mo->s[i].b[j]); fprintf (file, "%s\n", ending); } }} /* ghmm_dmodel_B_print *//*============================================================================*/void ghmm_dmodel_Pi_print (FILE * file, ghmm_dmodel * mo, char *tab, char *separator, char *ending){ int i; fprintf (file, "%s%.2f", tab, mo->s[0].pi); for (i = 1; i < mo->N; i++) fprintf (file, "%s %.2f", separator, mo->s[i].pi); fprintf (file, "%s\n", ending);} /* ghmm_dmodel_Pi_print */void ghmm_dmodel_fix_print (FILE * file, ghmm_dmodel * mo, char *tab, char *separator, char *ending){ int i; fprintf (file, "%s%d", tab, mo->s[0].fix); for (i = 1; i < mo->N; i++) fprintf (file, "%s %d", separator, mo->s[i].fix); fprintf (file, "%s\n", ending);} /* ghmm_dmodel_Pi_print *//*============================================================================*/void ghmm_dmodel_A_print_transp (FILE * file, ghmm_dmodel * mo, char *tab, char *separator, char *ending)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?