📄 readhmm_multialpha.c
字号:
#ifdef DEBUG_RD printf("adding path: ");#endif /* direct path to w, add total path */ for(i = 0; i < length; i++) { p_el.vertex = (temp_pathp + i)->vertex;#ifdef DEBUG_RD printf("%d ", p_el.vertex);#endif p_el.next = (*from_transp) + 1; memcpy(*from_transp, &p_el, sizeof(struct path_element)); (*from_transp)++; } memcpy(*from_transp, &cur_p_el, sizeof(struct path_element));#ifdef DEBUG_RD printf("%d %d\n", cur_p_el.vertex, w);#endif (*from_transp)++; } xp = *(hmmp->to_silent_trans_array + v); while(*xp != END) { add_all_from_paths_multi(*xp, w, hmmp, from_transp, temp_pathp, length + 1); xp++; }}/* Go through transmission matrix and get all probabilities that are not 0 * into to_trans_array */void create_to_trans_array_multi(struct hmm_multi_s *hmmp){ int v,w,*xp; int has_to_trans; int array_head_size, array_tail_size; struct path_element **to_trans_array, *to_trans, *temp_path; array_tail_size = 0; array_head_size = hmmp->nr_v; /* estimate how much space we need to store transitions */ array_tail_size = (hmmp->nr_t/hmmp->nr_v + 3 + MAX_GAP_SIZE) * MAX_GAP_SIZE/2 * hmmp->nr_v;#ifdef DEBUG_RD printf("array_tail_size = %d\n", array_tail_size);#endif to_trans_array = (struct path_element**) (malloc_or_die(array_head_size * sizeof(struct path_element*) + (array_tail_size + hmmp->nr_v) * sizeof(struct path_element))); to_trans = (struct path_element*)(to_trans_array + hmmp->nr_v); hmmp->to_trans_array = to_trans_array; /* find all paths and add them to to_trans_array */ for(v = 0; v < hmmp->nr_v; v++) /* from-vertex */ { *to_trans_array = to_trans; if(silent_vertex_multi(v, hmmp) == YES) { to_trans->vertex = END; to_trans->next = NULL; to_trans++; to_trans_array++; continue; } for(w = 0; w < hmmp->nr_v; w++) /* to-vertex */ { if(silent_vertex_multi(w,hmmp) == YES) { continue; } temp_path = (struct path_element*)(malloc_or_die(1000 * sizeof(struct path_element))); add_all_to_paths_multi(v, w, hmmp, &to_trans, temp_path, 0); free(temp_path); } to_trans->vertex = END; to_trans->next = NULL; to_trans++; to_trans_array++; } #ifdef DEBUG_RD printf("array_head_size, array_tail_size = %d, %d\n", array_head_size, array_tail_size); dump_to_trans_array(hmmp->nr_v, hmmp->to_trans_array);#endif }void add_all_to_paths_multi(int v, int w, struct hmm_multi_s *hmmp, struct path_element **to_transp, struct path_element *temp_pathp, int length){ int i,j; int *xp; struct path_element p_el; if(length > MAX_GAP_SIZE) { return; } if(*(hmmp->transitions + get_mtx_index(v, w, hmmp->nr_v)) != 0.0) { /* direct path to w, add total path */ for(i = 0; i < length; i++) { p_el.vertex = (temp_pathp + i)->vertex; p_el.next = (*to_transp) + 1; memcpy(*to_transp, &p_el, sizeof(struct path_element)); (*to_transp)++; } p_el.vertex = w; p_el.next = NULL; memcpy(*to_transp, &p_el, sizeof(struct path_element)); (*to_transp)++; } xp = *(hmmp->to_silent_trans_array + v); while(*xp != END) { (temp_pathp + length)->vertex = *xp; (temp_pathp + length)->next = NULL; add_all_to_paths_multi(*xp, w, hmmp, to_transp, temp_pathp, length + 1); xp++; } }int silent_vertex_multi(int k, struct hmm_multi_s *hmmp){#ifdef DEBUG_RD printf("startnode = %d\n",hmmp->startnode); printf("endnode = %d\n",hmmp->endnode); dump_silent_vertices_multi(hmmp); dump_emiss_matrix(hmmp->nr_v, hmmp->a_size, hmmp->emissions);#endif if(*(hmmp->emissions + get_mtx_index(k,0,hmmp->a_size)) == SILENT && k != hmmp->startnode && k != hmmp->endnode) { return YES; } else { return NO; }}/* check all probabilities and abort if some prob > 1.0 or < 0.0 */void check_probs_multi(struct hmm_multi_s *hmmp){ int i,j; double sum; double diff; double prob; /* transition probabilities first */ for(i = 0; i < hmmp->nr_v; i++) { sum = 0; for(j = 0; j < hmmp->nr_v; j++) { prob = *((hmmp->transitions) + (i*hmmp->nr_v) + j); if(prob > 1.0 || prob < 0.0) { printf("Illegal probabilities (prob < 0.0 or prob > 1.0)\n"); exit(-1); } else { sum += prob; } } diff = 1.0 - sum; /* maybe something about auto correcting the probabilities * will be implemented later */ } /* then emission probabilities */ for(i = 0; i < hmmp->nr_v; i++) { sum = 0; for(j = 0; j < hmmp->a_size; j++) { prob = *((hmmp->emissions) + (i*hmmp->a_size) + j); if((prob > 1.0 || prob < 0.0) && prob != SILENT) { printf("Illegal probabilities (prob < 0.0 or prob > 1.0)\n"); exit(-1); } else { sum += prob; } } diff = 1.0 - sum; /* maybe something about auto correcting the probabilities * will be implemented later */ }#ifdef DEBUG_RD //dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->transitions); //dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->log_transitions); //dump_emiss_matrix(hmmp->nr_v, hmmp->a_size, hmmp->emissions); //dump_emiss_matrix(hmmp->nr_v, hmmp->a_size, hmmp->log_emissions);#endif}int read_prior_files_multi(int nr_priorfiles, struct emission_dirichlet_s *emission_priorsp, int a_size, FILE *file){ int i,j,k; double q_value, alpha_value, alpha_sum, logbeta; char s[2048], *p; char ps[2048]; char *file_name; char *pri; FILE *priorfile; /* find \n sign and remove it */ if(fgets(s, 2048, file) != NULL) { p = s; while((p = strstr(p, "\n")) != NULL) { strncpy(p, " ", 1); } } /* read all before first filename */ strtok(s," "); if((file_name = strtok(NULL, " ")) == NULL) { /* done */ return 0; } for(i = 0; i < nr_priorfiles; i++) { /* open the priorfile */ if((file_name = strtok(NULL, " ")) == NULL) { /* done */ return 0; } else { if((priorfile = fopen(file_name,"r")) != NULL) { printf("Opened priorfile %s\n", file_name); } else { perror(file_name); return -1; } } /* put name i struct */ strcpy(emission_priorsp->name, file_name); /* put nr of components in struct */ if(fgets(ps, 2048, priorfile) != NULL) { } else { return -1; } while(*ps == '#') { if(fgets(ps, 2048, priorfile) != NULL) { } else { return -1; } } (emission_priorsp + i)->nr_components = atoi(&ps[0]); (emission_priorsp + i)->alphabet_size = a_size; /* allocate memory for arrays and matrix to this prior struct */ (emission_priorsp + i)->q_values = malloc_or_die((emission_priorsp + i)->nr_components * sizeof(double)); (emission_priorsp + i)->alpha_sums = malloc_or_die((emission_priorsp + i)->nr_components * sizeof(double)); (emission_priorsp + i)->logbeta_values = malloc_or_die((emission_priorsp + i)->nr_components * sizeof(double)); (emission_priorsp + i)->prior_values = malloc_or_die((emission_priorsp + i)->nr_components * a_size * sizeof(double)); for(j = 0; j < (emission_priorsp + i)->nr_components; j++) { /* put q-value in array, skip empty and comment lines */ while(1) { if(fgets(ps, 2048, priorfile) != NULL) { if(*ps == '#' || *ps == '\n') { } else { break; } } else { printf("Prior file has incorrect format\n"); } } q_value = atof(&ps[0]); *((emission_priorsp + i)->q_values + j) = q_value; #ifdef DEBUG_RDPRI printf("q_value = %f\n", *(((emission_priorsp + i)->q_values) + j));#endif /* put alpha-values of this component in matrix */ alpha_sum = 0.0; k = 0; if(fgets(ps, 2048, priorfile) != NULL) { } else { return -1; } while(*ps == '#') { if(fgets(ps, 2048, priorfile) != NULL) { } else { return -1; } } pri = &ps[0]; for(k = 0; k < a_size; k++) { alpha_value = strtod(pri, &pri); alpha_sum += alpha_value; *(((emission_priorsp + i)->prior_values) + get_mtx_index(j, k,a_size)) = alpha_value; } /* put sum of alphavalues in array */ *(((emission_priorsp + i)->alpha_sums) + j) = alpha_sum; /* calculate logB(alpha) for this component, store in array*/ logbeta = 0; for(k = 0; k < a_size; k++) { logbeta += lgamma(*(((emission_priorsp + i)->prior_values) + get_mtx_index(j, k, a_size))); #ifdef DEBUG_RDPRI printf("prior_value = %f\n", *(((emission_priorsp + i)->prior_values) + get_mtx_index(j, k, a_size))); printf("lgamma_value = %f\n", lgamma(*(((emission_priorsp + i)->prior_values) + get_mtx_index(j, k, a_size))));#endif } logbeta = logbeta - lgamma(*(((emission_priorsp + i)->alpha_sums) + j)); *(((emission_priorsp + i)->logbeta_values) + j) = logbeta; }#ifdef DEBUG_RDPRI dump_prior_struct(emission_priorsp + i);#endif /* some cleanup before continuing with next prior file */ fclose(priorfile); emission_priorsp++; } return 0;}int read_trans_prior_files_multi(int nr_priorfiles, void *emission_priorsp, struct hmm_multi_s *hmmp, FILE *file){ char s[2048]; /* not implemented yet */ if(fgets(s, 2048, file) != NULL) { } return 0;}void create_tot_transitions_multi(struct hmm_multi_s *hmmp){ int v,w; struct path_element *wp; double t_res; double log_t_res, cur_value; hmmp->tot_transitions = (double*)malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double)); hmmp->max_log_transitions = (double*)malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double)); init_float_mtx(hmmp->max_log_transitions, DEFAULT, hmmp->nr_v * hmmp->nr_v); for(v = 0; v < hmmp->nr_v; v++) { wp = *(hmmp->from_trans_array + v); while(wp->vertex != END) /* w = from-vertex */ { t_res = 1.0; w = wp->vertex; while(wp->next != NULL) { t_res = t_res * *((hmmp->transitions) + get_mtx_index(wp->vertex, (wp + 1)->vertex, hmmp->nr_v)); /* probability of transition from w to v via silent states */ wp++; } t_res = t_res * *((hmmp->transitions) + get_mtx_index(wp->vertex, v, hmmp->nr_v)); /* tot_transitions */ *(hmmp->tot_transitions + get_mtx_index(w,v,hmmp->nr_v)) += t_res; /* max_log_transitions */ if(t_res != 0.0) { log_t_res = log10(t_res); cur_value = *(hmmp->max_log_transitions + get_mtx_index(w,v,hmmp->nr_v)); if(cur_value == DEFAULT || log_t_res > cur_value) { *(hmmp->max_log_transitions + get_mtx_index(w,v,hmmp->nr_v)) = log_t_res; } } wp++; } }}void create_tot_trans_arrays_multi(struct hmm_multi_s *hmmp){ int v,w; struct path_element *p_elp; int malloc_size; malloc_size = 0; for(v = 0; v < hmmp->nr_v; v++) { for(w = 0; w < hmmp->nr_v; w++) { if(*(hmmp->tot_transitions + get_mtx_index(v,w,hmmp->nr_v)) != 0.0) { malloc_size++; } } } hmmp->tot_to_trans_array = (struct path_element**)malloc_or_die(hmmp->nr_v * sizeof(struct path_element*) + (malloc_size + hmmp->nr_v) * sizeof(struct path_element)); hmmp->tot_from_trans_array = (struct path_element**)malloc_or_die(hmmp->nr_v * sizeof(struct path_element*) + (malloc_size + hmmp->nr_v) * sizeof(struct path_element)); /* fill in tot_to_trans_array */ p_elp = (struct path_element*)(hmmp->tot_to_trans_array + hmmp->nr_v); for(v = 0; v < hmmp->nr_v; v++) { *(hmmp->tot_to_trans_array + v) = p_elp; for(w = 0; w < hmmp->nr_v; w++) { if(*(hmmp->tot_transitions + get_mtx_index(v,w,hmmp->nr_v)) != 0.0) { p_elp->vertex = w; p_elp->next = NULL; p_elp++; } } p_elp->vertex = END; p_elp->next = NULL; p_elp++; } /* fill in tot_from_trans_array */ p_elp = (struct path_element*)(hmmp->tot_from_trans_array + hmmp->nr_v); for(v = 0; v < hmmp->nr_v; v++) { *(hmmp->tot_from_trans_array + v) = p_elp; for(w = 0; w < hmmp->nr_v; w++) { if(*(hmmp->tot_transitions + get_mtx_index(w,v,hmmp->nr_v)) != 0.0) { p_elp->vertex = w; p_elp->next = NULL; p_elp++; } } p_elp->vertex = END; p_elp->next = NULL; p_elp++; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -