📄 smodel.c
字号:
matrix_d_free(&((*smo)->s[i].out_a), (*smo)->cos); matrix_d_free(&((*smo)->s[i].in_a), (*smo)->cos); m_free((*smo)->s[i].c); m_free((*smo)->s[i].mue); m_free((*smo)->s[i].u); } m_free((*smo)->s); m_free(*smo); return(0);#undef CUR_PROC} /* smodel_free */ /*============================================================================*/smodel *smodel_copy(const smodel *smo) {# define CUR_PROC "smodel_copy" int i, k, j, nachf, vorg, m; smodel *sm2 = NULL; if(!m_calloc(sm2, 1)) {mes_proc(); goto STOP;} if (!m_calloc(sm2->s, smo->N)) {mes_proc(); goto STOP;} for (i = 0; i < smo->N; i++) { nachf = smo->s[i].out_states; vorg = smo->s[i].in_states; if(!m_calloc(sm2->s[i].out_id, nachf)) {mes_proc(); goto STOP;} sm2->s[i].out_a = matrix_d_alloc(smo->cos, nachf); if(!sm2->s[i].out_a) {mes_proc(); goto STOP;} if(!m_calloc(sm2->s[i].in_id, vorg)) {mes_proc(); goto STOP;} sm2->s[i].in_a = matrix_d_alloc(smo->cos, vorg); if(!sm2->s[i].in_a) {mes_proc(); goto STOP;} if(!m_calloc(sm2->s[i].c, smo->M)) {mes_proc(); goto STOP;} if(!m_calloc(sm2->s[i].mue, smo->M)) {mes_proc(); goto STOP;} if(!m_calloc(sm2->s[i].u, smo->M)) {mes_proc(); goto STOP;} /* copy values */ for (j = 0; j < nachf; j++) { for (k = 0; k < smo->cos; k++) sm2->s[i].out_a[k][j] = smo->s[i].out_a[k][j]; sm2->s[i].out_id[j] = smo->s[i].out_id[j]; } for (j = 0; j < vorg; j++) { for (k = 0; k < smo->cos; k++) sm2->s[i].in_a[k][j] = smo->s[i].in_a[k][j]; sm2->s[i].in_id[j] = smo->s[i].in_id[j]; } for (m = 0; m < smo->M; m++) { sm2->s[i].c[m] = smo->s[i].c[m]; sm2->s[i].mue[m] = smo->s[i].mue[m]; sm2->s[i].u[m] = smo->s[i].u[m]; } sm2->s[i].pi = smo->s[i].pi; sm2->s[i].fix = smo->s[i].fix; sm2->s[i].out_states = nachf; sm2->s[i].in_states = vorg; } sm2->cos = smo->cos; sm2->N = smo->N; sm2->M = smo->M; sm2->density = smo->density; sm2->prior = smo->prior; return(sm2);STOP: smodel_free(&sm2); return(NULL);# undef CUR_PROC} /* smodel_copy *//*============================================================================*/int smodel_check(const smodel* smo) {# define CUR_PROC "smodel_check" int res = -1; double sum; int i,k,j; /* sum Pi[i] == 1 ? */ sum = 0.0; for (i = 0; i < smo->N; i++) { sum += smo->s[i].pi; } if ( fabs(sum - 1.0) >= EPS_PREC ) { mes_prot("sum Pi[i] != 1.0\n"); goto STOP; } /* only 0/1 in fix? */ for (i = 0; i < smo->N; i++) { if (smo->s[i].fix != 0 && smo->s[i].fix != 1) { mes_prot("in vector fix_state only 0/1 values!\n"); goto STOP; } } for (i = 0; i < smo->N; i++) { if (smo->s[i].out_states == 0) { char *str = mprintf(NULL,0,"out_states = 0 (state %d -> final state!)\n",i); mes_prot(str); } /* sum a[i][k][j] */ for (k = 0; k < smo->cos; k++) { sum = 0.0; for (j = 0; j < smo->s[i].out_states; j++) { sum += smo->s[i].out_a[k][j]; } if ( fabs(sum - 1.0) >= EPS_PREC ) { char *str = mprintf(NULL, 0, "sum out_a[j] = %.2f != 1.0 (state %d)\n", sum, i); mes_prot(str); m_free(str); /* goto STOP; */ } } /* sum c[j] */ sum = 0.0; for (j = 0; j < smo->M; j++) sum += smo->s[i].c[j]; if ( fabs(sum - 1.0) >= EPS_PREC ) { char *str = mprintf(NULL, 0, "sum c[j] = %.2f != 1.0 (state %d)\n",sum,i); mes_prot(str); m_free(str); goto STOP; } /* check mue, u ? */ } res = 0;STOP: return(res);# undef CUR_PROC} /* smodel_check *//*============================================================================*/int smodel_check_compatibility(smodel **smo, int smodel_number) {#define CUR_PROC "smodel_check_compatibility" int i, j; for (i = 0; i < smodel_number; i++) for (j = i+1; j < smodel_number; j++) { if (smo[i]->N != smo[j]->N) { char *str = mprintf(NULL, 0, "ERROR: different number of states in smodel %d (%d) and smodel %d (%d)", i, smo[i]->N, j, smo[j]->N); mes_prot(str); m_free(str); return (-1); } if (smo[i]->M != smo[j]->M) { char *str = mprintf(NULL, 0, "ERROR: different number of possible outputs in smodel %d (%d) and smodel %d (%d)", i, smo[i]->M, j, smo[j]->M); mes_prot(str); m_free(str); return (-1); } } return 0;#undef CUR_PROC } /* smodel_check_compatibility *//*============================================================================*/double smodel_get_random_var(smodel *smo, int state, int m) {# define CUR_PROC "smodel_get_random_var" switch (smo->density) { case normal_approx: case normal: return(randvar_normal(smo->s[state].mue[m], smo->s[state].u[m], 0)); case normal_pos: return(randvar_normal_pos(smo->s[state].mue[m],smo->s[state].u[m],0)); default: mes(MES_WIN, "Warning: density function not specified!\n"); return(-1); }# undef CUR_PROC} /* smodel_get_random_var *//*============================================================================*/sequence_d_t *smodel_generate_sequences(smodel* smo, int seed, int global_len, long seq_number, long label, int Tmax) {# define CUR_PROC "smodel_generate_sequences" /* An end state is characterized by not having an output probabiliy. */ sequence_d_t *sq = NULL; int state, n, i, j, m, reject_os, reject_tmax, badseq, class; double p, sum, osum = 0.0; int len = global_len, up = 0, stillbadseq = 0, reject_os_tmp = 0; sq = sequence_d_calloc(seq_number); if (!sq) { mes_proc(); goto STOP; } /* A specific length of the sequences isn't given. As a model should have an end state, the konstant MAX_SEQ_LEN is used. */ if (len <= 0) len = (int)MAX_SEQ_LEN; /* Maximum length of a sequence not given */ if (Tmax <= 0) Tmax = (int)MAX_SEQ_LEN; /* gsl is also used by randvar_std_normal seed == -1: Initialization, has already been done outside the function */ if (seed >= 0) { gsl_rng_init(); if (seed > 0) gsl_rng_set(RNG,seed); else /* Random initialization! */ gsl_rng_timeseed(RNG); } n = 0; reject_os = reject_tmax = 0; while (n < seq_number) { /* Test: A new seed for each sequence */ /* gsl_rng_timeseed(RNG); */ stillbadseq = badseq = 0; if(!m_calloc(sq->seq[n], len)) {mes_proc(); goto STOP;} /* Get a random initial state i */ p = gsl_rng_uniform(RNG); sum = 0.0; for (i = 0; i < smo->N; i++) { sum += smo->s[i].pi; if (sum >= p) break; } if (i == smo->N) { /* Can happen by a rounding error in the input */ i--; while (i > 0 && smo->s[i].pi == 0.0) i--; } /* Get a random initial output -> get a random m and then respectively a pdf omega. */ p = gsl_rng_uniform(RNG); sum = 0.0; for (m = 0; m < smo->M; m++) { sum += smo->s[i].c[m]; if (sum >= p) break; } if (m == smo->M) m--; /* Get random numbers according to the density function */ sq->seq[n][0] = smodel_get_random_var(smo, i, m); state = 1; /* The first symbol chooses the start class */ class = sequence_d_class(sq->seq[n], 0, &osum); while (state < len) { /* Get a new state */ p = gsl_rng_uniform(RNG); sum = 0.0; for (j = 0; j < smo->s[i].out_states; j++) { sum += smo->s[i].out_a[class][j]; if (sum >= p) break; } if (j == smo->s[i].out_states) {/* Can happen by a rounding error */ j--; while (j > 0 && smo->s[i].out_a[class][j] == 0.0) j--; } if (sum == 0.0) { if (smo->s[i].out_states > 0) { /* Repudiate the sequence, if all smo->s[i].out_a[class][.] == 0, that is, class "class" isn't used in the original data: go out of the while-loop, n should not be counted. */ /* printf("Zustand %d, class %d, len %d out_states %d \n", i, class, state, smo->s[i].out_states); */ badseq = 1; /* break; */ /* Try: If the class is "empty", try the neighbour class; first, sweep down to zero; if still no success, sweep up to COS - 1. If still no success --> Repudiate the sequence. */ if (class > 0 && up == 0) { class--; continue; } else if (class < smo->cos - 1) { class++; up = 1; continue; } else { stillbadseq = 1; break; } } else /* Final state reached, out of while-loop */ break; } i = smo->s[i].out_id[j]; /* fprintf(stderr, "%d\n", i); */ /* fprintf(stderr, "%d\n", i); */ /* Get output from state i */ p = gsl_rng_uniform(RNG); sum = 0.0; for (m = 0; m < smo->M; m++) { sum += smo->s[i].c[m]; if (sum >= p) break; } if (m == smo->M) { m--; while (m > 0 && smo->s[i].c[m] == 0.0) m--; } /* Get a random number from the corresponding density function */ sq->seq[n][state] = smodel_get_random_var(smo, i, m); /* Decide the class for the next step */ class = sequence_d_class(sq->seq[n], state, &osum); up = 0; state++; } /* while (state < len) */ if (badseq) { reject_os_tmp++; } if (stillbadseq) { reject_os++; m_free(sq->seq[n]); /* printf("cl %d, s %d, %d\n", class, i, n); */ } else if (state > Tmax) { reject_tmax++; m_free(sq->seq[n]); } else { if (state < len) if(m_realloc(sq->seq[n], state)) {mes_proc(); goto STOP;} sq->seq_len[n] = state; sq->seq_label[n] = label; /* vector_d_print(stdout, sq->seq[n], sq->seq_len[n]," "," ",""); */ n++; } /* printf("reject_os %d, reject_tmax %d\n", reject_os, reject_tmax); */ if (reject_os > 10000) { mes_prot("Reached max. no. of rejections\n"); break; } if (!(n%1000)) printf("%d Seqs. generated\n", n); } /* n-loop */ if (reject_os > 0) printf("%d sequences rejected (os)!\n", reject_os); if (reject_os_tmp > 0) printf("%d sequences changed class\n", reject_os_tmp - reject_os); if (reject_tmax > 0) printf("%d sequences rejected (Tmax, %d)!\n", reject_tmax, Tmax); return(sq);STOP: sequence_d_free(&sq); return(NULL);# undef CUR_PROC} /* smodel_generate_sequences *//*============================================================================*/int smodel_likelihood(smodel *smo, sequence_d_t *sqd, double *log_p) {# define CUR_PROC "smodel_likelihood" int res = -1; double log_p_i; int matched, i; matched = 0; *log_p = 0.0; for (i = 0; i < sqd->seq_number; i++) { if (sfoba_logp(smo, sqd->seq[i], sqd->seq_len[i], &log_p_i) != -1) { *log_p += log_p_i * sqd->seq_w[i]; matched++; } else { /* Test: high costs for each unmatched Seq. */ *log_p += PENALTY_LOGP * sqd->seq_w[i]; matched++; mes(MES_WIN, "sequence[%d] can't be build.\n", i); } } if (!matched) { mes_prot("NO sequence can be build.\n"); goto STOP; } /* return number of "matched" sequences */ res = matched;STOP: return(res);# undef CUR_PROC} /* smodel_likelihood */int smodel_individual_likelihoods(smodel *smo, sequence_d_t *sqd, double *log_ps) { int matched, res; double log_p_i; int i; res=-1; matched=0; for ( i = 0; i < sqd->seq_number; i++) { if (sfoba_logp(smo, sqd->seq[i], sqd->seq_len[i], &log_p_i) != -1) { log_ps[i] = log_p_i; matched++; } else { /* Test: very small log score for sequence cannot be produced. */ log_ps[i] = -DBL_MAX; /* fprintf(stderr,"sequence[%d] cannot be build.\n", i); */ } } res=matched; if (matched == 0) { fprintf(stderr, "smodel_likelihood: NO sequence can be build.\n"); } /* return number of "matched" sequences */ return res;}/*============================================================================*//* various print functions *//*============================================================================*//*============================================================================*/void smodel_Ak_print(FILE *file, smodel *smo, int k, char *tab, char *separator, char *ending) { int i, j, out_state; for (i = 0; i < smo->N; i++) { out_state = 0; fprintf(file, "%s", tab); if (smo->s[i].out_states > 0 && smo->s[i].out_id[out_state] == 0) { fprintf(file, "%.4f", smo->s[i].out_a[k][out_state]); out_state++; } else fprintf(file, "0.0 "); for (j = 1; j < smo->N; j++) { if (smo->s[i].out_states > out_state && smo->s[i].out_id[out_state] == j) { fprintf(file, "%s %.4f", separator, smo->s[i].out_a[k][out_state]); out_state++; } else fprintf(file, "%s 0.0 ", separator); } fprintf(file, "%s\n", ending); }} /* smodel_Ak_print *//*============================================================================*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -