📄 readhmm_multialpha.c
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include "structs.h" /* data structures etc */#include "funcs.h" /* function header */#define MAX_LINE 500//#define DEBUG_RD//#define DEBUG_RDPRIextern int verbose;int read_module_multi(char*, FILE*, struct hmm_multi_s*, struct module_multi_s*, int*, int*);void check_probs_multi(struct hmm_multi_s*);void create_to_silent_trans_array_multi(struct hmm_multi_s *hmmp);void create_from_silent_trans_array_multi(struct hmm_multi_s *hmmp);void create_from_trans_array_multi(struct hmm_multi_s*);void create_to_trans_array_multi(struct hmm_multi_s*);void create_tot_transitions_multi(struct hmm_multi_s*);void create_tot_trans_arrays_multi(struct hmm_multi_s *hmmp);void add_all_from_paths_multi(int v, int w, struct hmm_multi_s *hmmp, struct path_element **from_transp, struct path_element *temp_pathp, int length);void add_all_to_paths_multi(int v, int w, struct hmm_multi_s *hmmp, struct path_element **from_transp, struct path_element *temp_pathp, int length);int read_prior_files_multi(int, struct emission_dirichlet_s*, int, FILE*);int read_trans_prior_files_multi(int, void*, struct hmm_multi_s*, FILE*);int silent_vertex_multi(int v, struct hmm_multi_s *hmmp);int readhmm_check(FILE *hmmfile) { char s[MAX_LINE]; int filetype; filetype = SINGLE_HMM; while(fgets(s, MAX_LINE, hmmfile) != NULL) { if(strncmp(s, "NR OF ALPHABETS:",16) == 0) { filetype = MULTI_HMM; break; } else if(strncmp(s, "ALPHABET LENGTH:", 16) == 0) { filetype = SINGLE_HMM; break; } } rewind(hmmfile); return filetype;}void transform_singlehmmfile_to_multi(FILE *hmmfile, FILE *outfile){ char s[MAX_LINE], s_2[MAX_LINE]; if(outfile == NULL || hmmfile == NULL) { printf("Could not transform singlehmm to multi\n"); exit(0); } while(1) { if(fgets(s, MAX_LINE, hmmfile) != NULL) { if(strncmp(s, "ALPHABET:", 9) == 0) { if(fputs("NR OF ALPHABETS: 1\n", outfile) == EOF) { perror(""); } if(fputs("ALPHABET 1:", outfile) == EOF) { perror(""); } if(fputs(&s[9], outfile) == EOF) { perror(""); } } else if(strncmp(s, "ALPHABET LENGTH:", 16) == 0) { if(fputs("ALPHABET LENGTH 1:", outfile) == EOF) { perror(""); } if(fputs(&s[16], outfile) == EOF) { perror(""); } } else if(strncmp(s, "NR OF EMISSION PRIORFILES:", 26) == 0) { if(fputs("NR OF EMISSION PRIORFILES 1:", outfile) == EOF) { perror(""); } if(fputs(&s[26], outfile) == EOF) { perror(""); } } else if(strncmp(s, "EMISSION PRIORFILES:", 20) == 0) { if(fputs("EMISSION PRIORFILES_1:", outfile) == EOF) { perror(""); } if(fputs(&s[20], outfile) == EOF) { perror(""); } } else if(strncmp(s, "Emission prior file:", 20) == 0) { if(fputs("Emission prior file 1:", outfile) == EOF) { perror(""); } if(fputs(&s[20], outfile) == EOF) { perror(""); } } else if(strncmp(s, "Emission prior scaler:", 22) == 0) { if(fputs("Emission prior scaler 1:", outfile) == EOF) { perror(""); } if(fputs(&s[22], outfile) == EOF) { perror(""); } } else if(strncmp(s, "Nr emissions =", 14) == 0) { if(fputs("Nr emissions 1 =", outfile) == EOF) { perror(""); } if(fputs(&s[14], outfile) == EOF) { perror(""); } } else if(strncmp(s, "Emission probabilities", 22) == 0) { if(fputs("Emission probabilities 1\n", outfile) == EOF) { perror(""); } } else { if(fputs(s, outfile) == EOF) { perror(""); } } } else { break; } }}void copy_hmm_struct(struct hmm_multi_s *hmmp, struct hmm_multi_s *retrain_hmmp){ memcpy(retrain_hmmp->name, hmmp->name, 100); /* name of the HMM */ retrain_hmmp->constr_t = NULL; /* time of construction */ retrain_hmmp->nr_alphabets = hmmp->nr_alphabets; memcpy(retrain_hmmp->alphabet, hmmp->alphabet, 1000); /* the alphabet */ memcpy(retrain_hmmp->alphabet_2, hmmp->alphabet_2, 1000); /* the alphabet */ memcpy(retrain_hmmp->alphabet_3, hmmp->alphabet_3, 1000); /* the alphabet */ memcpy(retrain_hmmp->alphabet_4, hmmp->alphabet_4, 1000); /* the alphabet */ retrain_hmmp->alphabet_type = hmmp->alphabet_type; retrain_hmmp->alphabet_type_2 = hmmp->alphabet_type_2; retrain_hmmp->alphabet_type_3 = hmmp->alphabet_type_3; retrain_hmmp->alphabet_type_4 = hmmp->alphabet_type_4; retrain_hmmp->a_size = hmmp->a_size; retrain_hmmp->a_size_2 = hmmp->a_size_2; retrain_hmmp->a_size_3 = hmmp->a_size_3; retrain_hmmp->a_size_4 = hmmp->a_size_4; retrain_hmmp->nr_m = hmmp->nr_m; retrain_hmmp->nr_v = hmmp->nr_v; retrain_hmmp->nr_t = hmmp->nr_t; retrain_hmmp->nr_d = hmmp->nr_d; retrain_hmmp->nr_dt = hmmp->nr_dt; retrain_hmmp->nr_ttg = hmmp->nr_ttg; retrain_hmmp->nr_tt = hmmp->nr_tt; retrain_hmmp->nr_ed = hmmp->nr_ed; retrain_hmmp->nr_ed_2 = hmmp->nr_ed_2; retrain_hmmp->nr_ed_3 = hmmp->nr_ed_3; retrain_hmmp->nr_ed_4 = hmmp->nr_ed_4; retrain_hmmp->nr_tp = hmmp->nr_tp; retrain_hmmp->startnode = hmmp->startnode; retrain_hmmp->endnode = hmmp->endnode; retrain_hmmp->replacement_letters = hmmp->replacement_letters; /* point to same struct */ retrain_hmmp->modules = (struct module_multi_s**)(malloc_or_die(hmmp->nr_m * sizeof(struct module_multi_s*) + hmmp->nr_m * sizeof(struct module_multi_s))); memcpy(retrain_hmmp->modules, hmmp->modules, hmmp->nr_m * sizeof(struct module_multi_s*) + hmmp->nr_m * sizeof(struct module_multi_s)); retrain_hmmp->silent_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int))); retrain_hmmp->locked_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int))); memcpy(retrain_hmmp->silent_vertices, hmmp->silent_vertices, (hmmp->nr_v + 1) * sizeof(int)); memcpy(retrain_hmmp->locked_vertices, hmmp->locked_vertices, (hmmp->nr_v + 1) * sizeof(int)); retrain_hmmp->vertex_labels = (char*)(malloc_or_die(hmmp->nr_v * sizeof(char))); memcpy(retrain_hmmp->vertex_labels, hmmp->vertex_labels, hmmp->nr_v * sizeof(char)); retrain_hmmp->labels = (char*)(malloc_or_die(hmmp->nr_v * sizeof(char))); memcpy(retrain_hmmp->labels, hmmp->labels, hmmp->nr_v * sizeof(char)); retrain_hmmp->nr_labels = hmmp->nr_labels; retrain_hmmp->vertex_trans_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); memcpy(retrain_hmmp->vertex_trans_prior_scalers, hmmp->vertex_trans_prior_scalers, hmmp->nr_v * sizeof(double)); retrain_hmmp->vertex_emiss_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); memcpy(retrain_hmmp->vertex_emiss_prior_scalers, hmmp->vertex_emiss_prior_scalers, hmmp->nr_v * sizeof(double)); if(hmmp->nr_alphabets > 1) { retrain_hmmp->vertex_emiss_prior_scalers_2 = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); memcpy(retrain_hmmp->vertex_emiss_prior_scalers_2, hmmp->vertex_emiss_prior_scalers_2, hmmp->nr_v * sizeof(double)); } if(hmmp->nr_alphabets > 2) { retrain_hmmp->vertex_emiss_prior_scalers_3 = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); memcpy(retrain_hmmp->vertex_emiss_prior_scalers_3, hmmp->vertex_emiss_prior_scalers_3, hmmp->nr_v * sizeof(double)); } if(hmmp->nr_alphabets > 3) { retrain_hmmp->vertex_emiss_prior_scalers_4 = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); memcpy(retrain_hmmp->vertex_emiss_prior_scalers_4, hmmp->vertex_emiss_prior_scalers_4, hmmp->nr_v * sizeof(double)); } retrain_hmmp->transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); retrain_hmmp->log_transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); memcpy(retrain_hmmp->transitions, hmmp->transitions, hmmp->nr_v * hmmp->nr_v * sizeof(double)); memcpy(retrain_hmmp->log_transitions, hmmp->log_transitions, hmmp->nr_v * hmmp->nr_v * sizeof(double)); retrain_hmmp->emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); retrain_hmmp->log_emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); memcpy(retrain_hmmp->emissions, hmmp->emissions, hmmp->nr_v * hmmp->a_size * sizeof(double)); memcpy(retrain_hmmp->log_emissions, hmmp->log_emissions, hmmp->nr_v * hmmp->a_size * sizeof(double)); if(hmmp->nr_alphabets > 1) { retrain_hmmp->emissions_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * sizeof(double))); retrain_hmmp->log_emissions_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * sizeof(double))); memcpy(retrain_hmmp->emissions_2, hmmp->emissions_2, hmmp->nr_v * hmmp->a_size * sizeof(double)); memcpy(retrain_hmmp->log_emissions_2, hmmp->log_emissions_2, hmmp->nr_v * hmmp->a_size * sizeof(double)); } if(hmmp->nr_alphabets > 2) { retrain_hmmp->emissions_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * sizeof(double))); retrain_hmmp->log_emissions_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * sizeof(double))); memcpy(retrain_hmmp->emissions_3, hmmp->emissions_3, hmmp->nr_v * hmmp->a_size * sizeof(double)); memcpy(retrain_hmmp->log_emissions_3, hmmp->log_emissions_3, hmmp->nr_v * hmmp->a_size * sizeof(double)); } if(hmmp->nr_alphabets > 3) { retrain_hmmp->emissions_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * sizeof(double))); retrain_hmmp->log_emissions_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * sizeof(double))); memcpy(retrain_hmmp->emissions_4, hmmp->emissions_4, hmmp->nr_v * hmmp->a_size * sizeof(double)); memcpy(retrain_hmmp->log_emissions_4, hmmp->log_emissions_4, hmmp->nr_v * hmmp->a_size * sizeof(double)); } /* create to_silent_trans_array */ create_to_silent_trans_array_multi(retrain_hmmp); /* create from_trans_array */ create_from_trans_array_multi(retrain_hmmp); /* create to_trans_array */ create_to_trans_array_multi(retrain_hmmp); /* create tot_transitions */ create_tot_transitions_multi(retrain_hmmp); /* create tot_to_trans_array and tot_from_trans_arrays */ create_tot_trans_arrays_multi(retrain_hmmp); /* data structures */ retrain_hmmp->distrib_groups = (int*)(malloc_or_die((hmmp->nr_d + hmmp->nr_v) * sizeof(int))); memcpy(retrain_hmmp->distrib_groups, hmmp->distrib_groups,(hmmp->nr_d + hmmp->nr_v) * sizeof(int)); retrain_hmmp->trans_tie_groups = (int*)(malloc_or_die((hmmp->nr_t + hmmp->nr_ttg) * sizeof(struct transition_s))); memcpy(retrain_hmmp->trans_tie_groups, hmmp->trans_tie_groups,(hmmp->nr_t + hmmp->nr_ttg) * sizeof(struct transition_s)); retrain_hmmp->emission_dirichlets = hmmp->emission_dirichlets; retrain_hmmp->ed_ps = hmmp->ed_ps; retrain_hmmp->emission_dirichlets_2 = hmmp->emission_dirichlets_2; retrain_hmmp->ed_ps_2 = hmmp->ed_ps_2; retrain_hmmp->emission_dirichlets_3 = hmmp->emission_dirichlets_3; retrain_hmmp->ed_ps_3 = hmmp->ed_ps_3; retrain_hmmp->emission_dirichlets_4 = hmmp->emission_dirichlets_4; retrain_hmmp->ed_ps_4 = hmmp->ed_ps_4; retrain_hmmp->subst_mtx = hmmp->subst_mtx; retrain_hmmp->subst_mtx_2 = hmmp->subst_mtx_2; retrain_hmmp->subst_mtx_3 = hmmp->subst_mtx_3; retrain_hmmp->subst_mtx_4 = hmmp->subst_mtx_4; }int readhmm_multialpha(FILE *file, struct hmm_multi_s *hmmp){ char s[MAX_LINE], *c; int i,j,k; int res; int **from_trans_array, **to_trans_array; int *from_trans, *to_trans, *cur; struct module_multi_s *module; struct emission_dirichlet_s *emission_priorsp, *emission_priorsp_2, *emission_priorsp_3, *emission_priorsp_4; void *transition_priorsp; int nr_priorfiles, nr_trans_priorfiles; int silent_counter, locked_counter; char *nr_trans_tiesp, *nr_distrib_tiesp; struct transition_s *trans_ties; struct transition_s trans; if(verbose == YES) { printf("reading hmm "); } /* read header */ if(fgets(s, MAX_LINE, file) != NULL) { } /* name */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(hmmp->name, &s[6]); if(verbose == YES) { printf("%s ... ", hmmp->name); fflush(stdout); } } /* creation time */ fgets(s, MAX_LINE, file); /* nr of alphabets */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->nr_alphabets = atoi(&s[17]); } if(hmmp->nr_alphabets < 1 || hmmp->nr_alphabets > 4) { printf("Wrong nr of alphabets\n"); exit(0); } /* alphabet 1 */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(hmmp->alphabet, &s[12]); } /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */ hmmp->alphabet_type = DISCRETE; /* alphabet length 1 */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->a_size = atoi(&s[19]); } if(hmmp->nr_alphabets > 1) { /* alphabet 2 */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(hmmp->alphabet_2, &s[12]); } /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */ hmmp->alphabet_type_2 = DISCRETE; /* alphabet length 2 */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->a_size_2 = atoi(&s[19]); } } if(hmmp->nr_alphabets > 2) { /* alphabet 3 */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(hmmp->alphabet_3, &s[12]); } /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */ hmmp->alphabet_type_3 = DISCRETE; /* alphabet length 3 */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->a_size_3 = atoi(&s[19]); } } if(hmmp->nr_alphabets > 3) { /* alphabet 4 */ if(fgets(s, MAX_LINE, file) != NULL) { strcpy(hmmp->alphabet_4, &s[12]); } /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */ hmmp->alphabet_type_4 = DISCRETE; /* alphabet length 4 */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->a_size_4 = atoi(&s[19]); } } /* nr of modules */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->nr_m = atoi(&s[15]); } /* nr of vertices */ if(fgets(s, MAX_LINE, file) != NULL) { hmmp->nr_v = atoi(&s[16]); hmmp->transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); hmmp->log_transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double))); init_float_mtx(hmmp->log_transitions, DEFAULT, hmmp->nr_v * hmmp->nr_v); hmmp->emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); hmmp->log_emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double))); init_float_mtx(hmmp->log_emissions, DEFAULT, hmmp->nr_v * hmmp->a_size); if(hmmp->nr_alphabets > 1) { hmmp->emissions_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * sizeof(double))); hmmp->log_emissions_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * sizeof(double))); init_float_mtx(hmmp->log_emissions_2, DEFAULT, hmmp->nr_v * hmmp->a_size_2); } if(hmmp->nr_alphabets > 2) { hmmp->emissions_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * sizeof(double))); hmmp->log_emissions_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * sizeof(double))); init_float_mtx(hmmp->log_emissions_3, DEFAULT, hmmp->nr_v * hmmp->a_size_3); } if(hmmp->nr_alphabets > 3) { hmmp->emissions_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * sizeof(double))); hmmp->log_emissions_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * sizeof(double))); init_float_mtx(hmmp->log_emissions_4, DEFAULT, hmmp->nr_v * hmmp->a_size_4); } hmmp->modules = (struct module_multi_s**)(malloc_or_die(hmmp->nr_m * sizeof(struct module_multi_s*) + hmmp->nr_m * sizeof(struct module_multi_s))); module = (struct module_multi_s*)(hmmp->modules + hmmp->nr_m); hmmp->silent_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int))); hmmp->locked_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int))); for(i = 0; i < hmmp->nr_v; i++) { *(hmmp->locked_vertices + i) = NO; } hmmp->vertex_labels = (char*)(malloc_or_die(hmmp->nr_v * sizeof(char))); hmmp->vertex_trans_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); hmmp->vertex_emiss_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); if(hmmp->nr_alphabets > 1) { hmmp->vertex_emiss_prior_scalers_2 = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double))); } if(hmmp->nr_alphabets > 2) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -