smodel.c
来自「General Hidden Markov Model Library 一个通用」· C语言 代码 · 共 1,896 行 · 第 1/4 页
C
1,896 行
/*============================================================================*/ghmm_cseq *ghmm_cmodel_generate_sequences (ghmm_cmodel * smo, int seed, int global_len, long seq_number, long label, int Tmax){# define CUR_PROC "ghmm_cmodel_generate_sequences" /* An end state is characterized by not having an output probabiliy. */ ghmm_cseq *sq = NULL; int pos, n, i, j, m, reject_os, reject_tmax, badseq, class; double p, sum; int len = global_len, up = 0, stillbadseq = 0, reject_os_tmp = 0; sq = ghmm_cseq_calloc (seq_number); if (!sq) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* A specific length of the sequences isn't given. As a model should have an end state, the konstant MAX_SEQ_LEN is used. */ if (len <= 0) len = (int) GHMM_MAX_SEQ_LEN; /* Maximum length of a sequence not given */ if (Tmax <= 0) Tmax = (int) GHMM_MAX_SEQ_LEN; /* rng is also used by ighmm_rand_std_normal seed == -1: Initialization, has already been done outside the function */ if (seed >= 0) { ghmm_rng_init (); if (seed > 0) GHMM_RNG_SET (RNG, seed); else /* Random initialization! */ ghmm_rng_timeseed (RNG); } n = 0; reject_os = reject_tmax = 0; while (n < seq_number) { /* Test: A new seed for each sequence */ /* ghmm_rng_timeseed(RNG); */ stillbadseq = badseq = 0; ARRAY_CALLOC (sq->seq[n], len); /* Get a random initial state i */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (i = 0; i < smo->N; i++) { sum += smo->s[i].pi; if (sum >= p) break; } if (i == smo->N) { /* Can happen by a rounding error in the input */ i--; while (i > 0 && smo->s[i].pi == 0.0) i--; } /* Get a random initial output -> get a random m and then respectively a pdf omega. */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (m = 0; m < smo->s[i].M; m++) { sum += smo->s[i].c[m]; if (sum >= p) break; } if (m == smo->s[i].M) m--; /* Get random numbers according to the density function */ sq->seq[n][0] = ghmm_cmodel_get_random_var (smo, i, m); pos = 1; /* The first symbol chooses the start class */ if (smo->cos == 1) { class = 0; } else { /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,n,state);*/ if (!smo->class_change->get_class) { printf ("ERROR: get_class not initialized\n"); return (NULL); } class = smo->class_change->get_class (smo, sq->seq[n], n, 0); if (class >= smo->cos){ printf("ERROR: get_class returned index %d but model has only %d classes !\n",class,smo->cos); goto STOP; } } while (pos < len) { /* Get a new state */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (j = 0; j < smo->s[i].out_states; j++) { sum += smo->s[i].out_a[class][j]; if (sum >= p) break; } if (j == smo->s[i].out_states) { /* Can happen by a rounding error */ j--; while (j > 0 && smo->s[i].out_a[class][j] == 0.0) j--; } if (sum == 0.0) { if (smo->s[i].out_states > 0) { /* Repudiate the sequence, if all smo->s[i].out_a[class][.] == 0, that is, class "class" isn't used in the original data: go out of the while-loop, n should not be counted. */ /* printf("Zustand %d, class %d, len %d out_states %d \n", i, class, state, smo->s[i].out_states); */ badseq = 1; /* break; */ /* Try: If the class is "empty", try the neighbour class; first, sweep down to zero; if still no success, sweep up to COS - 1. If still no success --> Repudiate the sequence. */ if (class > 0 && up == 0) { class--; continue; } else if (class < smo->cos - 1) { class++; up = 1; continue; } else { stillbadseq = 1; break; } } else { /* Final state reached, out of while-loop */ break; } } i = smo->s[i].out_id[j]; /* fprintf(stderr, "%d\n", i); */ /* fprintf(stderr, "%d\n", i); */ /* Get output from state i */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (m = 0; m < smo->s[i].M; m++) { sum += smo->s[i].c[m]; if (sum >= p) break; } if (m == smo->s[i].M) { m--; while (m > 0 && smo->s[i].c[m] == 0.0) m--; } /* Get a random number from the corresponding density function */ sq->seq[n][pos] = ghmm_cmodel_get_random_var (smo, i, m); /* Decide the class for the next step */ if (smo->cos == 1) { class = 0; } else { /*printf("2: cos = %d, k = %d, t = %d\n",smo->cos,n,state);*/ if (!smo->class_change->get_class) { printf ("ERROR: get_class not initialized\n"); return (NULL); } class = smo->class_change->get_class (smo, sq->seq[n], n, pos); printf ("class = %d\n", class); if (class >= smo->cos){ printf("ERROR: get_class returned index %d but model has only %d classes !\n",class,smo->cos); goto STOP; } } up = 0; pos++; } /* while (state < len) */ if (badseq) { reject_os_tmp++; } if (stillbadseq) { reject_os++; m_free (sq->seq[n]); /* printf("cl %d, s %d, %d\n", class, i, n); */ } else if (pos > Tmax) { reject_tmax++; m_free (sq->seq[n]); } else { if (pos < len) ARRAY_REALLOC (sq->seq[n], pos); sq->seq_len[n] = pos;#ifdef GHMM_OBSOLETE sq->seq_label[n] = label;#endif /* GHMM_OBSOLETE */ /* ighmm_cvector_print(stdout, sq->seq[n], sq->seq_len[n]," "," ",""); */ n++; } /* printf("reject_os %d, reject_tmax %d\n", reject_os, reject_tmax); */ if (reject_os > 10000) { GHMM_LOG(LCONVERTED, "Reached max. no. of rejections\n"); break; } if (!(n % 1000)) printf ("%d Seqs. generated\n", n); } /* n-loop */ if (reject_os > 0) printf ("%d sequences rejected (os)!\n", reject_os); if (reject_os_tmp > 0) printf ("%d sequences changed class\n", reject_os_tmp - reject_os); if (reject_tmax > 0) printf ("%d sequences rejected (Tmax, %d)!\n", reject_tmax, Tmax); /* printf ("End of smodel_generate_sequences.\n"); */ return (sq);STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_cseq_free (&sq); return (NULL);# undef CUR_PROC} /* ghmm_cmodel_generate_sequences *//*============================================================================*/int ghmm_cmodel_likelihood (ghmm_cmodel * smo, ghmm_cseq * sqd, double *log_p){# define CUR_PROC "ghmm_cmodel_likelihood" int res = -1; double log_p_i; int matched, i; matched = 0; *log_p = 0.0; for (i = 0; i < sqd->seq_number; i++) { /* setting sequence number in class_change struct if necessary */ if (smo->cos > 1) { if (!smo->class_change) { printf ("cos = %d but class_change not initialized !\n", smo->cos); goto STOP; } smo->class_change->k = i; } if (ghmm_cmodel_logp (smo, sqd->seq[i], sqd->seq_len[i], &log_p_i) != -1) { *log_p += log_p_i * sqd->seq_w[i]; matched++; } else { /* Test: high costs for each unmatched Seq. */ *log_p += GHMM_PENALTY_LOGP * sqd->seq_w[i]; matched++; ighmm_mes (MES_WIN, "sequence[%d] can't be build.\n", i); } } if (!matched) { GHMM_LOG(LCONVERTED, "NO sequence can be build.\n"); goto STOP; } /* return number of "matched" sequences */ res = matched; /* resetting sequence number to default value */ if (smo->cos > 1) { smo->class_change->k = -1; }STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return (res);# undef CUR_PROC} /* ghmm_cmodel_likelihood */int ghmm_cmodel_individual_likelihoods (ghmm_cmodel * smo, ghmm_cseq * sqd, double *log_ps){ int matched, res; double log_p_i; int i; res = -1; matched = 0; for (i = 0; i < sqd->seq_number; i++) { /* setting sequence number in class_change struct if necessary */ if (smo->cos > 1) { if (!smo->class_change) { printf ("cos = %d but class_change not initialized !\n", smo->cos); goto STOP; } smo->class_change->k = i; } if (ghmm_cmodel_logp (smo, sqd->seq[i], sqd->seq_len[i], &log_p_i) != -1) { log_ps[i] = log_p_i; matched++; } else { /* Test: very small log score for sequence cannot be produced. */ log_ps[i] = -DBL_MAX; /* fprintf(stderr,"sequence[%d] cannot be build.\n", i); */ } } res = matched; if (matched == 0) { fprintf (stderr, "smodel_likelihood: NO sequence can be build.\n"); } /* resetting sequence number to default value */ if (smo->cos > 1) { smo->class_change->k = -1; } /* return number of "matched" sequences */ return res;STOP: /* Label STOP from ARRAY_[CM]ALLOC */ return -1;}/*============================================================================*//* various print functions *//*============================================================================*//*============================================================================*/void ghmm_cmodel_Ak_print (FILE * file, ghmm_cmodel * smo, int k, char *tab, char *separator, char *ending){ int i, j, out_state; for (i = 0; i < smo->N; i++) { out_state = 0; fprintf (file, "%s", tab); if (smo->s[i].out_states > 0 && smo->s[i].out_id[out_state] == 0) { fprintf (file, "%.4f", smo->s[i].out_a[k][out_state]); out_state++; } else fprintf (file, "0.0 "); for (j = 1; j < smo->N; j++) { if (smo->s[i].out_states > out_state && smo->s[i].out_id[out_state] == j) { fprintf (file, "%s %.4f", separator, smo->s[i].out_a[k][out_state]); out_state++; } else fprintf (file, "%s 0.0 ", separator); } fprintf (file, "%s\n", ending); }} /* ghmm_cmodel_Ak_print *//*============================================================================*/void ghmm_cmodel_C_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator, char *ending){ int i, j; for (i = 0; i < smo->N; i++) { fprintf (file, "%s", tab); fprintf (file, "%.4f", smo->s[i].c[0]); for (j = 1; j < smo->s[i].M; j++) fprintf (file, "%s %.4f", separator, smo->s[i].c[j]); fprintf (file, "%s\n", ending); }} /* ghmm_cmodel_C_print *//*============================================================================*/void ghmm_cmodel_Mue_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator, char *ending){ int i, j; for (i = 0; i < smo->N; i++) { fprintf (file, "%s", tab); fprintf (file, "%.4f", smo->s[i].mue[0]); for (j = 1; j < smo->s[i].M; j++) fprintf (file, "%s %.4f", separator, smo->s[i].mue[j]); fprintf (file, "%s\n", ending); }} /* ghmm_cmodel_Mue_print *//*============================================================================*/void ghmm_cmodel_U_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator, char *ending){ /* attention: choose precision big enough to allow printing of EPS_U in const.h */ int i, j; for (i = 0; i < smo->N; i++) { fprintf (file, "%s", tab); fprintf (file, "%.4f", smo->s[i].u[0]); for (j = 1; j < smo->s[i].M; j++) fprintf (file, "%s %.4f", separator, smo->s[i].u[j]); fprintf (file, "%s\n", ending); }} /* ghmm_cmodel_U_print *//*============================================================================*/void ghmm_cmodel_Pi_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator, char *ending){ int i; fprintf (file, "%s%.4f", tab, smo->s[0].pi); for (i = 1; i < smo->N; i++) fprintf (file, "%s %.4f", separator, smo->s[i].pi); fprintf (file, "%s\n", ending);} /* ghmm_cmodel_Pi_print *//*============================================================================*/void ghmm_cmodel_fix_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator, char *ending){ int i; fprintf (file, "%s%d", tab, smo->s[0].fix); for (i = 1; i < smo->N; i++) fprintf (file, "%s %d", separator, smo->s[i].fix); fprintf (file, "%s\n", ending);}/*============================================================================*/void ghmm_cmodel_print (FILE * file, ghmm_cmodel * smo){/* old function - No support to heterogeneous densities */ int k; fprintf (file, "SHMM = {\n\tM = %d;\n\tN = %d;\n\tdensity = %d;\n\tcos = %d;\n", smo->M, smo->N, (int) smo->s[0].density[0], smo->cos); /* smo files support only models with a single density */ fprintf (file, "\tprior = %.5f;\n", smo->prior); fprintf (file, "\tPi = vector {\n"); ghmm_cmodel_Pi_print (file, smo, "\t", ",", ";"); fprintf (file, "\t};\n"); fprintf (file, "\tfix_state = vector {\n"); ghmm_cmodel_fix_print (file, smo, "\t", ",", ";"); fprintf (file, "\t};\n"); for (k = 0; k < smo->cos; k++) { fprintf (file, "\tAk_%d = matrix {\n", k); ghmm_cmodel_Ak_print (file, smo, k, "\t", ",", ";"); fprintf (file, "\t};\n"); } fprintf (file, "\tC = matrix {\n"); ghmm_cmodel_C_print (file, smo, "\t", ",", ";"); fprintf (file, "\t};\n\tMue = matrix {\n"); ghmm_cmodel_Mue_print (file, smo, "\t", ",", ";"); fprintf (file, "\t};\n\tU = matrix {\n"); ghmm_cmodel_U_print (file, smo, "\t", ",", ";"); fprintf (file, "\t};\n"); fprintf (file, "};\n\n");} /* ghmm_cmodel_print *//*============================================================================*//* needed for hmm_input: only one A (=Ak_1 = Ak_2...) is written */void ghmm_cmodel_print_oneA (FILE * file, ghmm_cmodel * smo){/* old function - No support to heterogeneous densities */ fprintf (file, "SHMM = {\n\tM = %d;\n\tN = %d;\n\tdensity = %d;\n\tcos = %d;\n",
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?