📄 model.c
字号:
for (j = 0; j < mo->s[i].out_states; j++) { sum += mo->s[i].out_a[j]; /* printf(" out_a[%d][%d] = %8.5f\n", i,j, mo->s[i].out_a[j]); */ } if ( fabs(sum - 1.0) >= EPS_PREC ) { char *str = mprintf(NULL, 0, "sum out_a[j] = %.2f != 1.0 (state %d)\n", sum, i); mes_prot(str); m_free(str); /* goto STOP; */ } /* Sum the b[j]'s: normalized emission probs */ sum = 0.0; for (j = 0; j < mo->M; j++) sum += mo->s[i].b[j]; if ( fabs(sum - 1.0) >= EPS_PREC ) { res = -2; /* So we can ignore a silent state */ str = mprintf(NULL, 0, "sum b[j] = %.2f != 1.0 (state %d)\n", sum, i); mes_prot(str); m_free(str); goto STOP; } /* i over all states */ } res = 0;STOP: return(res);# undef CUR_PROC} /* model_check *//*============================================================================*/int model_check_compatibility(model **mo, int model_number) {#define CUR_PROC "model_check_compatibility" int i, j; for (i = 0; i < model_number; i++) for (j = i+1; j < model_number; j++) { if (mo[i]->N != mo[j]->N) { char *str = mprintf(NULL, 0, "ERROR: different number of states in model %d (%d) and model %d (%d)", i, mo[i]->N, j, mo[j]->N); mes_prot(str); m_free(str); return (-1); } if (mo[i]->M != mo[j]->M) { char *str = mprintf(NULL, 0, "ERROR: different number of possible outputs in model %d (%d) and model %d (%d)", i, mo[i]->M, j, mo[j]->M); mes_prot(str); m_free(str); return (-1); } } return 0;#undef CUR_PROC } /* model_check_compatibility *//*============================================================================*/model *model_generate_from_sequence(const int *seq, int seq_len, int anz_symb) {#define CUR_PROC "model_generate_from_sequence" int i; model *mo = NULL; state *s = NULL; if(!m_calloc(mo, 1)) {mes_proc(); goto STOP;} mo->N = seq_len; mo->M = anz_symb; /* Allocate memory for all vectors */ if (!m_calloc(mo->s, mo->N)) {mes_proc(); goto STOP;} for (i = 0; i < mo->N; i++) { if (i == 0) { /* Initial state */ if (model_state_alloc(mo->s, mo->M, 0, 1)) { mes_proc(); goto STOP; } } else if (i == mo->N - 1) { /* End state */ if (model_state_alloc(mo->s + i, mo->M, 1, 0)) { mes_proc(); goto STOP; } } else { /* others */ if (model_state_alloc(mo->s + i, mo->M, 1, 1)) { mes_proc(); goto STOP; } } } /* Allocate states with the right values, the initial state and the end state extra */ for (i = 1; i < mo->N - 1; i++) { s = mo->s + i; s->pi = 0.0; s->out_states = 1; s->in_states = 1; s->b[seq[i]] = 1.0; /* others stay 0 */ *(s->out_id) = i + 1; *(s->in_id) = i - 1; *(s->out_a) = *(s->in_a) = 1.0; } /* Initial state */ s = mo->s; s->pi = 1.0; s->out_states = 1; s->in_states = 0; s->b[seq[0]] = 1.0; *(s->out_id) = 1; *(s->out_a) = 1.0; /* No in_id and in_a */ /* End state */ s = mo->s + mo->N - 1; s->pi = 0.0; s->out_states = 0; s->in_states = 1; s->b[seq[mo->N-1]] = 1.0; /* All other b's stay zero */ *(s->in_id) = mo->N - 2; *(s->in_a) = 1.0; /* No out_id and out_a */ if (model_check(mo)) { mes_proc(); goto STOP; } return mo;STOP: model_free(&mo); return NULL;#undef CUR_PROC} /* model_generate_from_sequence *//*============================================================================*/sequence_t *model_generate_sequences(model* mo, int seed, int global_len, long seq_number, int Tmax) {# define CUR_PROC "model_generate_sequences" /* An end state is characterized by not having an output probabiliy. */ sequence_t *sq = NULL; int i, j, m,temp; double p, sum; int len = global_len; //int silent_len = 0; int n=0; int incomplete_seq = 0; int state=0; sq = sequence_calloc(seq_number); if (!sq) { mes_proc(); goto STOP; } if (len <= 0) /* A specific length of the sequences isn't given. As a model should have an end state, the konstant MAX_SEQ_LEN is used. */ len = (int)MAX_SEQ_LEN; if (seed > 0) { gsl_rng_set(RNG,seed); } while (n < seq_number) { //printf("sequenz n = %d\n",n); //printf("incomplete_seq: %d\n",incomplete_seq); if (incomplete_seq == 0) { if(!m_calloc(sq->seq[n], len)) {mes_proc(); goto STOP;} state = 0; } /* Get a random initial state i */ p = gsl_rng_uniform(RNG); sum = 0.0; for (i = 0; i < mo->N; i++) { sum += mo->s[i].pi; if (sum >= p) break; } if ( mo->model_type == kSilentStates ){ if (!mo->silent[i]) { /* first state emits */ //printf("first state %d not silent\n",i); /* Get a random initial output m */ p = gsl_rng_uniform(RNG); sum = 0.0; for (m = 0; m < mo->M; m++) { sum += mo->s[i].b[m]; if (sum >= p) break; } sq->seq[n][state] = m; state = state+1; } else { /* silent state: we do nothing, no output */ //printf("first state %d silent\n",i); //state = 0; //silent_len = silent_len + 1; } } else { /* Get a random initial output m */ p = gsl_rng_uniform(RNG); sum = 0.0; for (m = 0; m < mo->M; m++) { sum += mo->s[i].b[m]; if (sum >= p) break; } sq->seq[n][state] = m; state = state + 1; } /* check whether sequence was completed by inital state*/ if (state >= len && incomplete_seq == 1){ //printf("assinging length %d to sequence %d\n",state,n); //printf("sequence complete...\n"); sq->seq_len[n] = state; incomplete_seq = 0; n++; continue; } while (state < len) { /* Get a random state i */ p = gsl_rng_uniform(RNG); sum = 0.0; for (j = 0; j < mo->s[i].out_states; j++) { sum += mo->s[i].out_a[j]; if (sum >= p) break; } //printf("state %d selected (i: %d, j: %d) at position %d\n",mo->s[i].out_id[j],i,j,state); if (sum == 0.0) { if (state < len) { incomplete_seq = 1; //printf("final state reached - aborting\n"); break; } else { n++; break; } } i = mo->s[i].out_id[j]; if (mo->model_type == kSilentStates && mo->silent[i]) { /* Get a silent state i */ //printf("silent state \n"); //silent_len += 1; /*if (silent_len >= Tmax) { printf("%d silent states reached -> silent circle - aborting...\n",silent_len); incomplete_seq = 0; sq->seq_len[n] = state; n++; break; }*/ } else { /* Get a random output m from state i */ p = gsl_rng_uniform(RNG); sum = 0.0; for (m = 0; m < mo->M; m++) { sum += mo->s[i].b[m]; if (sum >= p) break; } sq->seq[n][state] = m; state++; } if (state == len ){ incomplete_seq = 0; sq->seq_len[n] = state; n++; } } /* while (state < len) */ } /* while( n < seq_number )*/return(sq);STOP: sequence_free(&sq); return(NULL);# undef CUR_PROC} /* data */ /*============================================================================*/double model_likelihood(model *mo, sequence_t *sq) {# define CUR_PROC "model_likelihood" double log_p_i, log_p; int found, i,j; //printf("*** model_likelihood:\n"); found = 0; log_p = 0.0; for (i = 0; i < sq->seq_number; i++) { //printf("sequence:\n"); //for (j=0;j < sq->seq_len[i];j++) { // printf("%d, ",sq->seq[i][j]); //} //printf("\n"); if (foba_logp(mo, sq->seq[i], sq->seq_len[i], &log_p_i) == -1) { mes_proc(); goto STOP; } //printf("\nlog_p_i = %f\n", log_p_i); if (log_p_i != +1) { log_p += log_p_i; found = 1; } else { char *str = mprintf(NULL, 0, "sequence[%d] can't be build.\n", i); mes_prot(str); } } if (!found) log_p = +1.0; return(log_p);STOP: return -1;# undef CUR_PROC} /* model_likelihood */void model_set_transition(model *mo, int i, int j, double prob) { # define CUR_PROC "model_set_transition" int in, out; if (mo->s && mo->s[i].out_a && mo->s[j].in_a) { for(out=0; out < mo->s[i].out_states; out++) { if ( mo->s[i].out_id[out] == j ) { mo->s[i].out_a[out] = prob; fprintf(stderr, "model_set_transition(0):State %d, %d, = %f\n", i, j, prob); break; } } for(in=0; in < mo->s[j].in_states; in++) { if ( mo->s[j].in_id[in] == i ) { mo->s[j].in_a[in] = prob; break; } } } # undef CUR_PROC}/* model_set_transition *//*============================================================================*//* Some outputs *//*============================================================================*/void model_states_print(FILE *file, model *mo) { int i, j; fprintf(file, "Modelparameters: \n M = %d \t N = %d\n", mo->M, mo->N); for (i = 0; i < mo->N; i++) { fprintf(file, "\nState %d \n PI = %.3f \n out_states = %d \n in_states = %d \n", i, mo->s[i].pi, mo->s[i].out_states, mo->s[i].in_states); fprintf(file, " Output probability:\t"); for (j = 0; j < mo->M; j++) fprintf(file, "%.3f \t", mo->s[i].b[j]); fprintf(file, "\n Transition probability \n"); fprintf(file, " Out states (Id, a):\t"); for (j = 0; j < mo->s[i].out_states; j++) fprintf(file, "(%d, %.3f) \t", mo->s[i].out_id[j], mo->s[i].out_a[j]); fprintf(file, "\n"); fprintf(file, " In states (Id, a):\t"); for (j = 0; j < mo->s[i].in_states; j++) fprintf(file, "(%d, %.3f) \t", mo->s[i].in_id[j], mo->s[i].in_a[j]); fprintf(file, "\n"); }} /* model_states_print *//*============================================================================*/void model_A_print(FILE *file, model *mo, char *tab, char *separator, char *ending) { int i, j, out_state; for (i = 0; i < mo->N; i++) { out_state = 0; fprintf(file, "%s", tab); if (mo->s[i].out_states > 0 && mo->s[i].out_id[out_state] == 0) { fprintf(file, "%.2f", mo->s[i].out_a[out_state]); out_state++; } else fprintf(file, "0.00"); for (j = 1; j < mo->N; j++) { if (mo->s[i].out_states > out_state && mo->s[i].out_id[out_state] == j) { fprintf(file, "%s %.2f", separator, mo->s[i].out_a[out_state]); out_state++; } else fprintf(file, "%s 0.00", separator); } fprintf(file, "%s\n", ending); }} /* model_A_print *//*============================================================================*/void model_B_print(FILE *file, model *mo, char *tab, char *separator, char *ending) { int i, j; for (i = 0; i < mo->N; i++) { fprintf(file, "%s", tab); fprintf(file, "%.2f", mo->s[i].b[0]); for (j = 1; j < mo->M; j++) fprintf(file, "%s %.2f", separator, mo->s[i].b[j]); fprintf(file, "%s\n", ending); }} /* model_B_print *//*============================================================================*/void model_Pi_print(FILE *file, model *mo, char *tab, char *separator, char *ending) { int i; fprintf(file, "%s%.2f", tab, mo->s[0].pi); for (i = 1; i < mo->N; i++) fprintf(file, "%s %.2f", separator, mo->s[i].pi); fprintf(file, "%s\n", ending);} /* model_Pi_print */void model_fix_print(FILE *file, model *mo, char *tab, char *separator, char *ending) { int i; fprintf(file, "%s%d", tab, mo->s[0].fix); for (i = 1; i < mo->N; i++) fprintf(file, "%s %d", separator, mo->s[i].fix); fprintf(file, "%s\n", ending);} /* model_Pi_print *//*============================================================================*/void model_A_print_transp(FILE *file, model *mo, char *tab, char *separator, char *ending) {# define CUR_PROC "model_A_print_transp" int i, j; int *out_state; if(!m_calloc(out_state, mo->N)) {mes_proc(); goto STOP;} for (i = 0; i < mo->N; i++) out_state[i] = 0; for (j = 0; j < mo->N; j++) { fprintf(file, "%s", tab); if (mo->s[0].out_states != 0 && mo->s[0].out_id[out_state[0]] == j) { fprintf(file, "%.2f", mo->s[0].out_a[out_state[0]]); (out_state[0])++; } else fprintf(file, "0.00"); for (i = 1; i < mo->N; i++) { if (mo->s[i].out_states != 0 && mo->s[i].out_id[out_state[i]] == j) { fprintf(file, "%s %.2f", separator, mo->s[i].out_a[out_state[i]]); (out_state[i])++; } else fprintf(file, "%s 0.00", separator); } fprintf(file, "%s\n", ending); }STOP: m_free(out_state); return;# undef CUR_PROC} /* model_A_print_transp *//*============================================================================*/void model_B_print_transp(FILE *file, model *mo, char *tab, char *separator, char *ending) { int i, j; for (j = 0; j < mo->M; j++) { fprintf(file, "%s", tab); fprintf(file, "%.2f", mo->s[0].b[j]); for (i = 1; i < mo->N; i++) fprintf(file, "%s %.2f", separator, mo->s[i].b[j]); fprintf(file, "%s\n", ending); }} /* model_B_print_transp *//*============================================================================*/void model_Pi_print_transp(FILE *file, model *mo, char *tab, char *ending) { int i; for (i = 0; i < mo->N; i++) fprintf(file, "%s%.2f%s\n", tab, mo->s[i].pi, ending);} /* model_Pi_print_transp *//*============================================================================*/void model_print(FILE *file, model *mo) { fprintf(file, "HMM = {\n\tM = %d;\n\tN = %d;\n", mo->M, mo->N); fprintf(file, "\tprior = %.3f;\n", mo->prior); fprintf(file, "\tA = matrix {\n"); model_A_print(file, mo, "\t", ",", ";"); fprintf(file, "\t};\n\tB = matrix {\n"); model_B_print(file, mo,"\t", ",", ";"); fprintf(file, "\t};\n\tPi = vector {\n"); model_Pi_print(file, mo, "\t", ",", ";"); fprintf(file, "\t};\n\tfix_state = vector {\n"); model_fix_print(file, mo, "\t", ",", ";"); fprintf(file, "\t};\n};\n\n");} /* model_print *//*============================================================================*/void model_direct_print(FILE *file, model_direct *mo_d, int multip) { int i, j; for (i = 0; i < multip; i++) { fprintf(file, "HMM = {\n\tM = %d;\n\tN = %d;\n", mo_d->M, mo_d->N); fprintf(file, "\tprior = %.3f;\n", mo_d->prior); fprintf(file, "\tA = matrix {\n"); matrix_d_print(file, mo_d->A, mo_d->N, mo_d->N, "\t", ",", ";"); fprintf(file, "\t};\n\tB = matrix {\n"); matrix_d_print(file, mo_d->B, mo_d->N, mo_d->M, "\t", ",", ";"); fprintf(file, "\t};\n\tPi = vector {\n"); fprintf(file, "\t%.4f", mo_d->Pi[0]); for (j = 1; j < mo_d->N; j++) fprintf(file, ", %.4f", mo_d->Pi[j]); fprintf(file, ";\n\t};\n"); fprintf(file, "\tfix_state = vector {\n"); fprintf(file, "\t%d", mo_d->fix_state[0]); for (j = 1; j < mo_d->N; j++) fprintf(file, ", %d", mo_d->fix_state[j]); fprintf(file, ";\n\t};\n"); fprintf(file, "};\n\n"); }} /* model_direct_print *//*============================================================================*/void model_direct_clean(model_direct *mo_d, hmm_check_t *check) {#define CUR_PROC "model_direct_clean" int i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -