📄 model.c
字号:
if (!mo_d) return; mo_d->M = mo_d->N = 0; mo_d->prior = -1; if (mo_d->A) { for (i = 0; i < check->r_a; i++) m_free(mo_d->A[i]); m_free(mo_d->A); } if (mo_d->B) { for (i = 0; i < check->r_b; i++) m_free(mo_d->B[i]); m_free(mo_d->B); } if (mo_d->Pi) m_free(mo_d->Pi); if (mo_d->fix_state) m_free(mo_d->fix_state); mo_d->A = mo_d->B = NULL; mo_d->Pi = NULL; mo_d->fix_state = NULL;#undef CUR_PROC} /* model_direct_clean *//*============================================================================*/int model_direct_check_data(model_direct *mo_d, hmm_check_t *check) {#define CUR_PROC "model_direct_check_data" char *str; if (check->r_a != mo_d->N || check->c_a != mo_d->N) { str = mprintf(NULL, 0, "Incompatible dim. A (%d X %d) and N (%d)\n", check->r_a, check->c_a, mo_d->N); mes_prot(str); m_free(str); return (-1); } if (check->r_b != mo_d->N || check->c_b != mo_d->M) { str = mprintf(NULL,0,"Incompatible dim. B (%d X %d) and N X M (%d X %d)\n", check->r_b, check->c_b, mo_d->N, mo_d->M); mes_prot(str); m_free(str); return (-1); } if (check->len_pi != mo_d->N) { str = mprintf(NULL, 0, "Incompatible dim. Pi (%d) and N (%d)\n", check->len_pi, mo_d->N); mes_prot(str); m_free(str); return (-1); } if (check->len_fix != mo_d->N) { str = mprintf(NULL, 0, "Incompatible dim. fix_state (%d) and N (%d)\n", check->len_fix, mo_d->N); mes_prot(str); m_free(str); return (-1); } return 0;#undef CUR_PROC} /* model_direct_check_data *//*============================================================================*//* XXX symmetric not implemented yet */double model_prob_distance(model *m0, model *m, int maxT, int symmetric, int verbose){#define CUR_PROC "model_prob_distance"#define STEPS 40 double p0, p; double d = 0.0; double *d1; sequence_t *seq0 = NULL; sequence_t *tmp = NULL; model *mo1, *mo2; int i, t, a, k; int true_len; int true_number; int left_to_right = 0; long total, index; int step_width = 0; int steps = 1; //printf("*** model_prob_distance:\n"); if (verbose) { /* If we are doing it verbosely we want to have 40 steps */ step_width = maxT / 40; steps = STEPS; } else /* else just one */ step_width = maxT; if( !m_calloc(d1, steps) ) {mes_proc();goto STOP;} mo1 = m0; mo2 = m; for (k = 0; k < 2; k++) { /* Two passes for the symmetric case */ /* seed = 0 -> no reseeding. Call gsl_rng_timeseed(RNG) externally */ seq0 = model_generate_sequences(mo1, 0, maxT+1, 1,maxT+1); if (seq0 == NULL) { mes_prot(" generate_sequences failed !");goto STOP;} if (seq0->seq_len[0] < maxT) { /* There is an absorbing state */ /* NOTA BENE: Assumpting the model delivers an explicit end state, the condition of a fix initial state is removed. */ /* For now check that Pi puts all weight on state */ /* t = 0; for (i = 0; i < mo1->N; i++) { if (mo1->s[i].pi > 0.001) t++; } if (t > 1) { mes_prot("ERROR: No proper left-to-right model. Multiple start states"); goto STOP; } */ left_to_right = 1; total = seq0->seq_len[0]; while (total <= maxT) { /* create a additional sequences at once */ a = (maxT - total) / (total / seq0->seq_number) + 1; /* printf("total=%d generating %d", total, a); */ tmp = model_generate_sequences(mo1, 0, 0, a,a); if (tmp == NULL) { mes_prot(" generate_sequences failed !");goto STOP;} sequence_free(&tmp); sequence_add(seq0,tmp); total = 0; for (i = 0; i < seq0->seq_number; i++) total += seq0->seq_len[i]; } } if (left_to_right) { for (t=step_width, i=0; t <= maxT; t+= step_width, i++) { index = 0; total = seq0->seq_len[0]; /* Determine how many sequences we need to get a total of t and adjust length of last sequence to obtain total of exactly t */ while(total < t) { index++; total += seq0->seq_len[index]; } true_len = seq0->seq_len[index]; true_number = seq0->seq_number; if ((total - t) > 0) seq0->seq_len[index] = total - t; seq0->seq_number = index; p0 = model_likelihood(mo1, seq0); if (p0 == +1 || p0 == -1) { /* error! */ mes_prot("problem: model_likelihood failed !"); goto STOP; } p = model_likelihood(mo2, seq0); if (p == +1 || p == -1) { /* what shall we do now? */ mes_prot("problem: model_likelihood failed !"); goto STOP; } d = 1.0 / t * (p0 - p); if (symmetric) { if (k == 0) /* save d */ d1[i] = d; else { /* calculate d */ d = 0.5 * (d1[i] + d); } } if (verbose && (!symmetric || k == 1)) printf("%d\t%f\t%f\t%f\n", t, p0, p, d); seq0->seq_len[index] = true_len; seq0->seq_number = true_number; } } else { true_len = seq0->seq_len[0]; for (t=step_width, i=0; t <= maxT; t+= step_width, i++) { seq0->seq_len[0] = t; p0 = model_likelihood(mo1, seq0); //printf(" P(O|m1) = %f\n",p0); if (p0 == +1) { /* error! */ mes_prot("seq0 can't be build from mo1!"); goto STOP; } p = model_likelihood(mo2, seq0); //printf(" P(O|m2) = %f\n",p); if (p == +1) { /* what shall we do now? */ mes_prot("problem: seq0 can't be build from mo2!"); goto STOP; } d = (1.0 / t) * (p0 - p); if (symmetric) { if (k == 0) /* save d */ d1[i] = d; else { /* calculate d */ d = 0.5 * (d1[i] + d); } } if (verbose && (!symmetric || k == 1)) printf("%d\t%f\t%f\t%f\n", t, p0, p, d); } seq0->seq_len[0] = true_len; } if (symmetric) { sequence_free(&seq0); mo1 = m ; mo2 = m0; } else break; } /* k = 1,2 */ sequence_free(&seq0); free(d1); return d;STOP: sequence_free(&seq0); free(d1); return(0.0);#undef CUR_PROC}/*============================================================================*/void state_clean(state *my_state) {#define CUR_PROC "state_clean" if (!my_state) return; if (my_state->b) m_free(my_state->b); if (my_state->out_id) m_free(my_state->out_id); if (my_state->in_id) m_free(my_state->in_id); if (my_state->out_a) m_free(my_state->out_a); if (my_state->in_a) m_free(my_state->in_a); my_state->pi = 0; my_state->b = NULL; my_state->out_id = NULL; my_state->in_id = NULL; my_state->out_a = NULL; my_state->in_a = NULL; my_state->out_states = 0; my_state->in_states = 0; my_state->fix = 0; #undef CUR_PROC} /* state_clean *//*============================================================================*//*state* state_copy(state *my_state) { state* new_state = (state*) malloc(sizeof(state)); state_copy_to(my_state,new_state); return new_state; }*/ /* state_copy *//*============================================================================*//*void state_copy_to(state *source, state* dest) { dest->pi = source->pi; dest->out_states = source->out_states; dest->in_states = source->in_states; dest->fix = source->fix; dest->b = malloc(xxx); memcpy(dest->b,source->b,xxx); dest->out_id = malloc(xxx); memcpy(dest->out_id,source->out_id,xxx); dest->in_id = malloc(xxx); memcpy(dest->in_id,source->in_id,xxx); dest->out_a = malloc(xxx); memcpy(dest->out_a,source->out_a,xxx); dest->in_a = malloc(xxx); memcpy(dest->in_a,source->in_a,xxx); }*/ /* state_copy_to */ /*==========================Labeled HMMS ================================*/ sequence_t *model_label_generate_sequences(model* mo, int seed, int global_len, long seq_number, int Tmax) {# define CUR_PROC "model_label_generate_sequences" /* An end state is characterized by not having an output probabiliy. */ sequence_t *sq = NULL; int i, j, m,temp; double p, sum; int len = global_len; //int silent_len = 0; int n=0; int incomplete_seq = 0; int state=0; int label_index = 0; sq = sequence_calloc(seq_number); if (!sq) { mes_proc(); goto STOP; } /* allocating additional fields for the labels in the sequence_t struct */ if(!m_calloc(sq->state_labels, seq_number)) {mes_proc(); goto STOP;} if(!m_calloc(sq->state_labels_len, seq_number)) {mes_proc(); goto STOP;} if (len <= 0) /* A specific length of the sequences isn't given. As a model should have an end state, the konstant MAX_SEQ_LEN is used. */ len = (int)MAX_SEQ_LEN; if (seed > 0) { gsl_rng_set(RNG,seed); } while (n < seq_number) { //printf("sequenz n = %d\n",n); //printf("incomplete_seq: %d\n",incomplete_seq); if (incomplete_seq == 0) { if(!m_calloc(sq->seq[n], len)) {mes_proc(); goto STOP;} if (mo->model_type == kSilentStates){ printf("Model has silent states. \n"); /* for silent models we have to allocate for the maximal possible number of lables*/ if(!m_calloc(sq->state_labels[n], len * mo->N)) {mes_proc(); goto STOP;} } else { printf("Model has no silent states. \n"); if(!m_calloc(sq->state_labels[n], len)) {mes_proc(); goto STOP;} } label_index = 0; state = 0; } /* Get a random initial state i */ p = gsl_rng_uniform(RNG); sum = 0.0; for (i = 0; i < mo->N; i++) { sum += mo->s[i].pi; if (sum >= p) break; } /* add label of fist state to the label list */ sq->state_labels[n][label_index] = mo->s[i].label; label_index++; if ( mo->model_type == kSilentStates ){ if (!mo->silent[i]) { /* first state emits */ //printf("first state %d not silent\n",i); /* Get a random initial output m */ p = gsl_rng_uniform(RNG); sum = 0.0; for (m = 0; m < mo->M; m++) { sum += mo->s[i].b[m]; if (sum >= p) break; } sq->seq[n][state] = m; state = state+1; } else { /* silent state: we do nothing, no output */ //printf("first state %d silent\n",i); //state = 0; //silent_len = silent_len + 1; } } else { /* Get a random initial output m */ p = gsl_rng_uniform(RNG); sum = 0.0; for (m = 0; m < mo->M; m++) { sum += mo->s[i].b[m]; if (sum >= p) break; } sq->seq[n][state] = m; state = state + 1; } /* check whether sequence was completed by inital state*/ if (state >= len && incomplete_seq == 1){ //printf("assinging length %d to sequence %d\n",state,n); //printf("sequence complete...\n"); sq->seq_len[n] = state; sq->state_labels_len[n] = label_index; printf("1: seq %d -> %d labels\n",n,sq->state_labels_len[n]); if (mo->model_type == kSilentStates){ printf("reallocating\n"); if (m_realloc(sq->state_labels[n], sq->state_labels_len[n])){mes_proc(); goto STOP;} } incomplete_seq = 0; n++; continue; } while (state < len) { /* Get a random state i */ p = gsl_rng_uniform(RNG); sum = 0.0; for (j = 0; j < mo->s[i].out_states; j++) { sum += mo->s[i].out_a[j]; if (sum >= p) break; } i = mo->s[i].out_id[j]; /* add label of state to the label list */ sq->state_labels[n][label_index] = mo->s[i].label; label_index++; //printf("state %d selected (i: %d, j: %d) at position %d\n",mo->s[i].out_id[j],i,j,state); if (sum == 0.0) { if (state < len) { incomplete_seq = 1; //printf("final state reached - aborting\n"); break; } else { n++; break; } } if (mo->model_type == kSilentStates && mo->silent[i]) { /* Got a silent state i */ //printf("silent state \n"); //silent_len += 1; /*if (silent_len >= Tmax) { printf("%d silent states reached -> silent circle - aborting...\n",silent_len); incomplete_seq = 0; sq->seq_len[n] = state; n++; break; }*/ } else { /* Get a random output m from state i */ p = gsl_rng_uniform(RNG); sum = 0.0; for (m = 0; m < mo->M; m++) { sum += mo->s[i].b[m]; if (sum >= p) break; } sq->seq[n][state] = m; state++; } if (state == len ){ incomplete_seq = 0; sq->state_labels_len[n] = label_index; printf("2: seq %d -> %d labels\n",n,sq->state_labels_len[n]); if (mo->model_type == kSilentStates){ printf("reallocating\n"); if (m_realloc(sq->state_labels[n], sq->state_labels_len[n])){mes_proc(); goto STOP;} } sq->seq_len[n] = state; n++; } } /* while (state < len) */ } /* while( n < seq_number )*/return(sq);STOP: sequence_free(&sq); return(NULL);# undef CUR_PROC} /* data */ /*============================================================================*/ /*===================== E n d o f f i l e "model.c" ===============*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -