📄 readhmm.c
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include "structs.h" /* data structures etc */#include "funcs.h" /* function header */#define MAX_LINE 500//#define DEBUG_RD//#define DEBUG_RDPRIextern int verbose;int read_module(char*, FILE*, struct hmm_multi_s*, struct module_multi_s*, int*, int*);void check_probs(struct hmm_multi_s*);void create_to_silent_trans_array(struct hmm_multi_s *hmmp);void create_from_silent_trans_array(struct hmm_multi_s *hmmp);void create_from_trans_array(struct hmm_multi_s*);void create_to_trans_array(struct hmm_multi_s*);void create_tot_transitions(struct hmm_multi_s*);void create_tot_trans_arrays(struct hmm_multi_s *hmmp);void add_all_from_paths(int v, int w, struct hmm_multi_s *hmmp, struct path_element **from_transp, struct path_element *temp_pathp, int length);void add_all_to_paths(int v, int w, struct hmm_multi_s *hmmp, struct path_element **from_transp, struct path_element *temp_pathp, int length);int read_prior_files(int, struct emission_dirichlet_s*, struct hmm_multi_s*, FILE*);int read_trans_prior_files(int, void*, struct hmm_multi_s*, FILE*);int silent_vertex(int v, struct hmm_multi_s *hmmp);int readhmm(FILE *file, struct hmm_multi_s *hmmp){ char s[MAX_LINE], *c; int i,j; int res; int **from_trans_array, **to_trans_array; int *from_trans, *to_trans, *cur; struct module_multi_s *module; struct emission_dirichlet_s *emission_priorsp; void *transition_priorsp; int nr_priorfiles, nr_trans_priorfiles; int silent_counter, locked_counter; char *nr_trans_tiesp, *nr_distrib_tiesp; struct transition_s *trans_ties; struct transition_s trans; if(verbose == YES) { printf("reading hmm "); } /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */ hmmp->alphabet_type = DISCRETE; /* read header */ if(fgets(s, MAX_LINE, file) != NULL) { } /* name */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(hmmp->name, &s[6]); if(verbose == YES) { printf("%s ... ", hmmp->name); fflush(stdout); } } /* creation time */ fgets(s, MAX_LINE, file); /* alphabet */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(hmmp->alphabet, &s[10]); hmmp->nr_alphabets = 1; } /* alphabet length */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->a_size = atoi(&s[17]); } /* nr of modules */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->nr_m = atoi(&s[15]); } /* nr of vertices */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->nr_v = atoi(&s[16]); hmmp->transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); hmmp->log_transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); init_float_mtx(hmmp->log_transitions, DEFAULT, hmmp->nr_v * hmmp->nr_v); hmmp->emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); hmmp->log_emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); init_float_mtx(hmmp->log_emissions, DEFAULT, hmmp->nr_v * hmmp->a_size); hmmp->modules = (struct module_multi_s**)(malloc_or_die(hmmp->nr_m * sizeof(struct module_multi_s*) + hmmp->nr_m * sizeof(struct module_multi_s))); module = (struct module_multi_s*)(hmmp->modules + hmmp->nr_m); hmmp->silent_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int))); hmmp->locked_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int))); for(i = 0; i < hmmp->nr_v; i++) { *(hmmp->locked_vertices + i) = NO; } hmmp->vertex_labels = (char*)(malloc_or_die(hmmp->nr_v * sizeof(char))); hmmp->vertex_trans_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); hmmp->vertex_emiss_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); } /* nr of transitions */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->nr_t = atoi(&s[19]); } /* nr of distribution groups */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->nr_d = atoi(&s[27]); hmmp->distrib_groups = (int*)(malloc_or_die((hmmp->nr_d + hmmp->nr_v) * sizeof(int))); } /* nr of trans tie groups */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->nr_ttg = atoi(&s[29]); hmmp->trans_tie_groups = (int*)(malloc_or_die((hmmp->nr_t + hmmp->nr_ttg) * sizeof(struct transition_s))); } /* nr of emission priorfiles */ if(fgets(s, MAX_LINE, file) != NULL) { nr_priorfiles = atoi(&s[27]); hmmp->nr_ed = nr_priorfiles; emission_priorsp = malloc_or_die(nr_priorfiles * sizeof(struct emission_dirichlet_s)); hmmp->emission_dirichlets = emission_priorsp; hmmp->ed_ps = malloc_or_die(hmmp->nr_v * sizeof(struct emission_dirichlet_s*)); } /* read the emission priorfiles */ if(read_prior_files(nr_priorfiles, emission_priorsp, hmmp, file) < 0) { printf("Could not read emission priorfiles\n"); exit(-1); } /* nr of transition priorfiles */ if(fgets(s, MAX_LINE, file) != NULL) { nr_trans_priorfiles = atoi(&s[29]); transition_priorsp = NULL; /* not implemented yet */ } /* read the transition priorfiles */ if(read_trans_prior_files(nr_trans_priorfiles, transition_priorsp, hmmp, file) < 0) { printf("Could not read transition priorfiles\n"); exit(-1); } /* empty row */ if(fgets(s, MAX_LINE, file) != NULL) { } /* empty row */ if(fgets(s, MAX_LINE, file) != NULL) { } /* reads ****************Modules*****************/ if(fgets(s, MAX_LINE, file) != NULL) { } /* read the modules */ silent_counter = 0; locked_counter = 0; for(i = 0; i < hmmp->nr_m; i++) { *(hmmp->modules + i) = module; if((res = read_module(s, file, hmmp, module, &silent_counter, &locked_counter)) < 0) { printf("Could not read modules\n"); exit(-1); } module++; } *(hmmp->silent_vertices + silent_counter) = END; *(hmmp->locked_vertices + hmmp->nr_v) = END;#ifdef DEBUG_RD //dump_locked_vertices(hmmp); //dump_silent_vertices(hmmp); //dump_modules(hmmp);#endif /* empty row */ if(fgets(s, MAX_LINE, file) != NULL) { } /* reads ****************Emission distribution groups*****************/ if(fgets(s, MAX_LINE, file) != NULL) { } /* read the distribution groups */ cur = hmmp->distrib_groups; for(i = 0; i < hmmp->nr_d; i++) { j = 0; if(fgets(s, MAX_LINE, file) != NULL) { while(1) { if(s[j] == ':') { break; } j++; } j++; j++; while(1) { *cur = atoi(&s[j]); cur++; while(s[j] != ' ' && s[j] != '\n') { j++; } while(s[j] == ' ') { j++; } if(s[j] == '\n') { break; } } *cur = END; cur++; } } /* empty row */ if(fgets(s, MAX_LINE, file) != NULL) { } /* reads ****************Transition tie groups*****************/ if(fgets(s, MAX_LINE, file) != NULL) { } /* read the trans tie groups */ trans_ties = hmmp->trans_tie_groups; for(i = 0; i < hmmp->nr_ttg; i++) { if(fgets(s, MAX_LINE, file) != NULL && s[0] != '\n') { j = 0; while(1) { if(s[j] == ':') { break; } j++; } j++; j++; while(1) { trans.from_v = atoi(&s[j]); while(s[j] != '>') { j++; } j++; trans.to_v = atoi(&s[j]); memcpy(trans_ties, &trans, sizeof(struct transition_s)); trans_ties++; while(s[j] != ' ' && s[j] != '\n') { j++; } while(s[j] == ' ') { j++; } if(s[j] == '\n') { break; } } trans.to_v = END; trans.from_v = END; memcpy(trans_ties, &trans, sizeof(struct transition_s)); trans_ties++; } else { hmmp->nr_ttg = i; break; } }#ifdef DEBUG_RD dump_distrib_groups(hmmp->distrib_groups, hmmp->nr_d); dump_trans_tie_groups(hmmp->trans_tie_groups, hmmp->nr_ttg);#endif /* create to_silent_trans_array */ create_to_silent_trans_array(hmmp); /* create from_trans_array */ create_from_trans_array(hmmp); /* create to_trans_array */ create_to_trans_array(hmmp); /* create tot_transitions */ create_tot_transitions(hmmp); /* create tot_to_trans_array and tot_from_trans_arrays*/ create_tot_trans_arrays(hmmp); /* get the set of labels and the number of labels */ get_set_of_labels_multi(hmmp); #ifdef DEBUG_RD dump_emiss_matrix(hmmp->nr_v, hmmp->a_size, hmmp->emissions); dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->transitions); printf("hmmp->emission_dirichlets = %x\n", hmmp->emission_dirichlets); for(i = 0; i < hmmp->nr_v; i++) { printf("hmmp->ed_ps for vertex %d = %x\n", i, *(hmmp->ed_ps + i)); }#endif /* make sure all probabilities are legal*/ //check_probs(hmmp); if(verbose == YES) { printf("done\n"); }}/************************read_module *************************************/int read_module(char *s, FILE *file, struct hmm_multi_s *hmmp, struct module_multi_s *modulep, int *silent_counter, int *locked_counter){ int nr_v, nr_t, nr_e, nr_et; int i,j,k; int from_v, to_v; double prob, log_prob; char type[50]; char prifile_name[500]; char *p, *probp; struct emission_dirichlet_s *priorsp; int silent_vertex; /* module name */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(modulep->name, &s[8]); } /* module type */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(type, &s[6]); if(strncmp(type, "Singlenode", 10) == 0) { modulep->type = SINGLENODE; } else if(strncmp(type, "Cluster", 7) == 0) { modulep->type = CLUSTER; } else if(strncmp(type, "Forward_std", 11) == 0) { modulep->type = FORWARD_STD; } else if(strncmp(type, "Forward_alt", 11) == 0) { modulep->type = FORWARD_ALT; } else if(strncmp(type, "Singleloop", 10) == 0) { modulep->type = SINGLELOOP; } else if(strncmp(type, "Profile7", 8) == 0) { modulep->type = PROFILE7; } else if(strncmp(type, "Profile9", 8) == 0) { modulep->type = PROFILE9; } else { printf("Error: module is of unknown type\n"); exit(-1); } } /* nr vertices */ if(fgets(s, MAX_LINE, file) != NULL) { modulep->nr_v = atoi(&s[12]); modulep->vertices = (int*)(malloc_or_die(modulep->nr_v * sizeof(int))); } /* emission prior file */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(prifile_name, (&s[21])); if((p = strstr(prifile_name, "\n")) != NULL) { *p = '\0'; } if(strncmp(prifile_name, "null", 4) == 0) { strcpy(modulep->priorfile_name, "null"); priorsp = NULL; } else { strcpy(modulep->priorfile_name, prifile_name); strcat(modulep->priorfile_name, "\0"); for(i = 0; i < hmmp->nr_ed; i++) { priorsp = (hmmp->emission_dirichlets + i); if((strncmp(prifile_name, priorsp->name, 200)) == 0) { /* keep this priorsp */ break; } else {#ifdef DEBUG_RD printf("prifile_name = %s\n", prifile_name); printf("priorsp->name = %s\n", priorsp->name);#endif } if(i == hmmp->nr_ed-1) /* no name equals priorfile_name */{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -