⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readhmm_multialpha.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 4 页
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include "structs.h" /* data structures etc */#include "funcs.h" /* function header */#define MAX_LINE 500//#define DEBUG_RD//#define DEBUG_RDPRIextern int verbose;int read_module_multi(char*, FILE*, struct hmm_multi_s*, struct module_multi_s*, int*, int*);void check_probs_multi(struct hmm_multi_s*);void create_to_silent_trans_array_multi(struct hmm_multi_s *hmmp);void create_from_silent_trans_array_multi(struct hmm_multi_s *hmmp);void create_from_trans_array_multi(struct hmm_multi_s*);void create_to_trans_array_multi(struct hmm_multi_s*);void create_tot_transitions_multi(struct hmm_multi_s*);void create_tot_trans_arrays_multi(struct hmm_multi_s *hmmp);void add_all_from_paths_multi(int v, int w, struct hmm_multi_s *hmmp,		   struct path_element **from_transp, struct path_element *temp_pathp, int length);void add_all_to_paths_multi(int v, int w, struct hmm_multi_s *hmmp,		   struct path_element **from_transp, struct path_element *temp_pathp, int length);int read_prior_files_multi(int, struct emission_dirichlet_s*, int, FILE*);int read_trans_prior_files_multi(int, void*, struct hmm_multi_s*, FILE*);int silent_vertex_multi(int v, struct hmm_multi_s *hmmp);int readhmm_check(FILE *hmmfile) {  char  s[MAX_LINE];  int filetype;  filetype = SINGLE_HMM;  while(fgets(s, MAX_LINE, hmmfile) != NULL) {    if(strncmp(s, "NR OF ALPHABETS:",16) == 0) {      filetype = MULTI_HMM;      break;    }    else if(strncmp(s, "ALPHABET LENGTH:", 16) == 0) {      filetype = SINGLE_HMM;      break;    }  }  rewind(hmmfile);  return filetype;}void transform_singlehmmfile_to_multi(FILE *hmmfile, FILE *outfile){  char s[MAX_LINE], s_2[MAX_LINE];  if(outfile == NULL || hmmfile == NULL) {    printf("Could not transform singlehmm to multi\n");    exit(0);  }  while(1) {    if(fgets(s, MAX_LINE, hmmfile) != NULL) {      if(strncmp(s, "ALPHABET:", 9) == 0) {	if(fputs("NR OF ALPHABETS: 1\n", outfile) == EOF) {	  perror("");	}	if(fputs("ALPHABET 1:", outfile) == EOF) {	  perror("");	}	if(fputs(&s[9], outfile) == EOF) {	  perror("");	}      }      else if(strncmp(s, "ALPHABET LENGTH:", 16) == 0) {	if(fputs("ALPHABET LENGTH 1:", outfile) == EOF) {	  perror("");	}	if(fputs(&s[16], outfile) == EOF) {	  perror("");	}      }      else if(strncmp(s, "NR OF EMISSION PRIORFILES:", 26) == 0) {	if(fputs("NR OF EMISSION PRIORFILES 1:", outfile) == EOF) {	  perror("");	}	if(fputs(&s[26], outfile) == EOF) {	  perror("");	}      }      else if(strncmp(s, "EMISSION PRIORFILES:", 20) == 0) {	if(fputs("EMISSION PRIORFILES_1:", outfile) == EOF) {	  perror("");	}	if(fputs(&s[20], outfile) == EOF) {	  perror("");	}      }      else if(strncmp(s, "Emission prior file:", 20) == 0) {	if(fputs("Emission prior file 1:", outfile) == EOF) {	  perror("");	}	if(fputs(&s[20], outfile) == EOF) {	  perror("");	}      }      else if(strncmp(s, "Emission prior scaler:", 22) == 0) {	if(fputs("Emission prior scaler 1:", outfile) == EOF) {	  perror("");	}	if(fputs(&s[22], outfile) == EOF) {	  perror("");	}      }      else if(strncmp(s, "Nr emissions =", 14) == 0) {	if(fputs("Nr emissions 1 =", outfile) == EOF) {	  perror("");	}	if(fputs(&s[14], outfile) == EOF) {	  perror("");	}      }      else if(strncmp(s, "Emission probabilities", 22) == 0) {	if(fputs("Emission probabilities 1\n", outfile) == EOF) {	  perror("");	}      }      else {	if(fputs(s, outfile) == EOF) {	  perror("");	}      }    }    else {      break;    }  }}void copy_hmm_struct(struct hmm_multi_s *hmmp, struct hmm_multi_s *retrain_hmmp){  memcpy(retrain_hmmp->name, hmmp->name, 100);  /* name of the HMM */  retrain_hmmp->constr_t = NULL; /* time of construction */  retrain_hmmp->nr_alphabets = hmmp->nr_alphabets;  memcpy(retrain_hmmp->alphabet, hmmp->alphabet, 1000); /* the alphabet */  memcpy(retrain_hmmp->alphabet_2, hmmp->alphabet_2, 1000); /* the alphabet */  memcpy(retrain_hmmp->alphabet_3, hmmp->alphabet_3, 1000); /* the alphabet */  memcpy(retrain_hmmp->alphabet_4, hmmp->alphabet_4, 1000); /* the alphabet */  retrain_hmmp->alphabet_type = hmmp->alphabet_type;  retrain_hmmp->alphabet_type_2 = hmmp->alphabet_type_2;  retrain_hmmp->alphabet_type_3 = hmmp->alphabet_type_3;  retrain_hmmp->alphabet_type_4 = hmmp->alphabet_type_4;  retrain_hmmp->a_size =  hmmp->a_size;  retrain_hmmp->a_size_2 =  hmmp->a_size_2;  retrain_hmmp->a_size_3 =  hmmp->a_size_3;  retrain_hmmp->a_size_4 =  hmmp->a_size_4;  retrain_hmmp->nr_m =  hmmp->nr_m;  retrain_hmmp->nr_v =  hmmp->nr_v;  retrain_hmmp->nr_t =  hmmp->nr_t;  retrain_hmmp->nr_d =  hmmp->nr_d;  retrain_hmmp->nr_dt =  hmmp->nr_dt;  retrain_hmmp->nr_ttg =  hmmp->nr_ttg;  retrain_hmmp->nr_tt =  hmmp->nr_tt;  retrain_hmmp->nr_ed =  hmmp->nr_ed;  retrain_hmmp->nr_ed_2 =  hmmp->nr_ed_2;  retrain_hmmp->nr_ed_3 =  hmmp->nr_ed_3;  retrain_hmmp->nr_ed_4 =  hmmp->nr_ed_4;  retrain_hmmp->nr_tp =  hmmp->nr_tp;  retrain_hmmp->startnode =  hmmp->startnode;  retrain_hmmp->endnode =  hmmp->endnode;   retrain_hmmp->replacement_letters =  hmmp->replacement_letters; /* point to same struct */    retrain_hmmp->modules = (struct module_multi_s**)(malloc_or_die(hmmp->nr_m * sizeof(struct module_multi_s*) +								  hmmp->nr_m * sizeof(struct module_multi_s)));  memcpy(retrain_hmmp->modules, hmmp->modules, hmmp->nr_m * sizeof(struct module_multi_s*) +	 hmmp->nr_m * sizeof(struct module_multi_s));      retrain_hmmp->silent_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int)));  retrain_hmmp->locked_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int)));  memcpy(retrain_hmmp->silent_vertices, hmmp->silent_vertices, (hmmp->nr_v + 1) * sizeof(int));  memcpy(retrain_hmmp->locked_vertices, hmmp->locked_vertices, (hmmp->nr_v + 1) * sizeof(int));    retrain_hmmp->vertex_labels = (char*)(malloc_or_die(hmmp->nr_v * sizeof(char)));  memcpy(retrain_hmmp->vertex_labels, hmmp->vertex_labels, hmmp->nr_v * sizeof(char));  retrain_hmmp->labels = (char*)(malloc_or_die(hmmp->nr_v * sizeof(char)));  memcpy(retrain_hmmp->labels, hmmp->labels, hmmp->nr_v * sizeof(char));  retrain_hmmp->nr_labels = hmmp->nr_labels;       retrain_hmmp->vertex_trans_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double)));  memcpy(retrain_hmmp->vertex_trans_prior_scalers, hmmp->vertex_trans_prior_scalers, hmmp->nr_v * sizeof(double));     retrain_hmmp->vertex_emiss_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double)));  memcpy(retrain_hmmp->vertex_emiss_prior_scalers, hmmp->vertex_emiss_prior_scalers, hmmp->nr_v * sizeof(double));  if(hmmp->nr_alphabets > 1) {    retrain_hmmp->vertex_emiss_prior_scalers_2 = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double)));    memcpy(retrain_hmmp->vertex_emiss_prior_scalers_2, hmmp->vertex_emiss_prior_scalers_2, hmmp->nr_v * sizeof(double));      }  if(hmmp->nr_alphabets > 2) {    retrain_hmmp->vertex_emiss_prior_scalers_3 = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double)));    memcpy(retrain_hmmp->vertex_emiss_prior_scalers_3, hmmp->vertex_emiss_prior_scalers_3, hmmp->nr_v * sizeof(double));   }  if(hmmp->nr_alphabets > 3) {    retrain_hmmp->vertex_emiss_prior_scalers_4 = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double)));    memcpy(retrain_hmmp->vertex_emiss_prior_scalers_4, hmmp->vertex_emiss_prior_scalers_4, hmmp->nr_v * sizeof(double));  }    retrain_hmmp->transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double)));  retrain_hmmp->log_transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * sizeof(double)));  memcpy(retrain_hmmp->transitions, hmmp->transitions, hmmp->nr_v * hmmp->nr_v * sizeof(double));  memcpy(retrain_hmmp->log_transitions, hmmp->log_transitions, hmmp->nr_v * hmmp->nr_v * sizeof(double));    retrain_hmmp->emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double)));  retrain_hmmp->log_emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * sizeof(double)));  memcpy(retrain_hmmp->emissions, hmmp->emissions, hmmp->nr_v * hmmp->a_size * sizeof(double));  memcpy(retrain_hmmp->log_emissions, hmmp->log_emissions, hmmp->nr_v * hmmp->a_size * sizeof(double));  if(hmmp->nr_alphabets > 1) {    retrain_hmmp->emissions_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * 							sizeof(double)));    retrain_hmmp->log_emissions_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * 							    sizeof(double)));    memcpy(retrain_hmmp->emissions_2, hmmp->emissions_2, hmmp->nr_v * hmmp->a_size * sizeof(double));    memcpy(retrain_hmmp->log_emissions_2, hmmp->log_emissions_2, hmmp->nr_v * hmmp->a_size * sizeof(double));  }  if(hmmp->nr_alphabets > 2) {    retrain_hmmp->emissions_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * 							sizeof(double)));    retrain_hmmp->log_emissions_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * 							    sizeof(double)));    memcpy(retrain_hmmp->emissions_3, hmmp->emissions_3, hmmp->nr_v * hmmp->a_size * sizeof(double));    memcpy(retrain_hmmp->log_emissions_3, hmmp->log_emissions_3, hmmp->nr_v * hmmp->a_size * sizeof(double));  }  if(hmmp->nr_alphabets > 3) {    retrain_hmmp->emissions_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * 							sizeof(double)));    retrain_hmmp->log_emissions_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * 							    sizeof(double)));    memcpy(retrain_hmmp->emissions_4, hmmp->emissions_4, hmmp->nr_v * hmmp->a_size * sizeof(double));    memcpy(retrain_hmmp->log_emissions_4, hmmp->log_emissions_4, hmmp->nr_v * hmmp->a_size * sizeof(double));  }      /* create to_silent_trans_array */  create_to_silent_trans_array_multi(retrain_hmmp);    /* create from_trans_array */  create_from_trans_array_multi(retrain_hmmp);   /* create to_trans_array */  create_to_trans_array_multi(retrain_hmmp);   /* create tot_transitions */  create_tot_transitions_multi(retrain_hmmp);   /* create tot_to_trans_array and tot_from_trans_arrays */  create_tot_trans_arrays_multi(retrain_hmmp);  /* data structures */    retrain_hmmp->distrib_groups = (int*)(malloc_or_die((hmmp->nr_d + hmmp->nr_v) * sizeof(int)));  memcpy(retrain_hmmp->distrib_groups, hmmp->distrib_groups,(hmmp->nr_d + hmmp->nr_v) * sizeof(int));     retrain_hmmp->trans_tie_groups = (int*)(malloc_or_die((hmmp->nr_t + hmmp->nr_ttg) * sizeof(struct transition_s)));  memcpy(retrain_hmmp->trans_tie_groups, hmmp->trans_tie_groups,(hmmp->nr_t + hmmp->nr_ttg) * sizeof(struct transition_s));      retrain_hmmp->emission_dirichlets =  hmmp->emission_dirichlets;  retrain_hmmp->ed_ps = hmmp->ed_ps;  retrain_hmmp->emission_dirichlets_2 =  hmmp->emission_dirichlets_2;  retrain_hmmp->ed_ps_2 = hmmp->ed_ps_2;  retrain_hmmp->emission_dirichlets_3 =  hmmp->emission_dirichlets_3;  retrain_hmmp->ed_ps_3 = hmmp->ed_ps_3;  retrain_hmmp->emission_dirichlets_4 =  hmmp->emission_dirichlets_4;  retrain_hmmp->ed_ps_4 = hmmp->ed_ps_4;      retrain_hmmp->subst_mtx = hmmp->subst_mtx;  retrain_hmmp->subst_mtx_2 = hmmp->subst_mtx_2;  retrain_hmmp->subst_mtx_3 = hmmp->subst_mtx_3;  retrain_hmmp->subst_mtx_4 = hmmp->subst_mtx_4; }int readhmm_multialpha(FILE *file, struct hmm_multi_s *hmmp){  char  s[MAX_LINE], *c;  int i,j,k;  int res;  int **from_trans_array, **to_trans_array;  int *from_trans, *to_trans, *cur;  struct module_multi_s *module;  struct emission_dirichlet_s *emission_priorsp, *emission_priorsp_2, *emission_priorsp_3, *emission_priorsp_4;  void *transition_priorsp;  int nr_priorfiles, nr_trans_priorfiles;  int silent_counter, locked_counter;  char *nr_trans_tiesp, *nr_distrib_tiesp;  struct transition_s *trans_ties;  struct transition_s trans;  if(verbose == YES) {    printf("reading hmm ");  }  /* read header */  if(fgets(s, MAX_LINE, file) != NULL) {  }  /* name */  if(fgets(s, MAX_LINE, file) != NULL) {    strcpy(hmmp->name, &s[6]);    if(verbose == YES) {      printf("%s ... ", hmmp->name);      fflush(stdout);    }  }  /* creation time */  fgets(s, MAX_LINE, file);  /* nr of alphabets */  if(fgets(s, MAX_LINE, file) != NULL) {    hmmp->nr_alphabets = atoi(&s[17]);  }    if(hmmp->nr_alphabets < 1 || hmmp->nr_alphabets > 4) {    printf("Wrong nr of alphabets\n");    exit(0);  }  /* alphabet 1 */  if(fgets(s, MAX_LINE, file) != NULL) {    strcpy(hmmp->alphabet, &s[12]);  }    /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */  hmmp->alphabet_type = DISCRETE;    /* alphabet length 1 */  if(fgets(s, MAX_LINE, file) != NULL) {    hmmp->a_size = atoi(&s[19]);  }  if(hmmp->nr_alphabets > 1) {    /* alphabet 2 */    if(fgets(s, MAX_LINE, file) != NULL) {      strcpy(hmmp->alphabet_2, &s[12]);    }    /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */    hmmp->alphabet_type_2 = DISCRETE;    /* alphabet length 2 */    if(fgets(s, MAX_LINE, file) != NULL) {      hmmp->a_size_2 = atoi(&s[19]);    }  }  if(hmmp->nr_alphabets > 2) {    /* alphabet 3 */    if(fgets(s, MAX_LINE, file) != NULL) {      strcpy(hmmp->alphabet_3, &s[12]);    }    /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */    hmmp->alphabet_type_3 = DISCRETE;    /* alphabet length 3 */    if(fgets(s, MAX_LINE, file) != NULL) {      hmmp->a_size_3 = atoi(&s[19]);    }  }  if(hmmp->nr_alphabets > 3) {    /* alphabet 4 */    if(fgets(s, MAX_LINE, file) != NULL) {      strcpy(hmmp->alphabet_4, &s[12]);    }    /* set alphabet to be = DISCRETE as default, this should be reset when reading the sequences */    hmmp->alphabet_type_4 = DISCRETE;    /* alphabet length 4 */    if(fgets(s, MAX_LINE, file) != NULL) {      hmmp->a_size_4 = atoi(&s[19]);    }  }  /* nr of modules */  if(fgets(s, MAX_LINE, file) != NULL) {    hmmp->nr_m = atoi(&s[15]);  }  /* nr of vertices */  if(fgets(s, MAX_LINE, file) != NULL) {    hmmp->nr_v = atoi(&s[16]);    hmmp->transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * 						sizeof(double)));    hmmp->log_transitions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->nr_v * 						    sizeof(double)));    init_float_mtx(hmmp->log_transitions, DEFAULT, hmmp->nr_v * hmmp->nr_v);    hmmp->emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * 					      sizeof(double)));    hmmp->log_emissions = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size * 						  sizeof(double)));    init_float_mtx(hmmp->log_emissions, DEFAULT, hmmp->nr_v * hmmp->a_size);    if(hmmp->nr_alphabets > 1) {      hmmp->emissions_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * 						  sizeof(double)));      hmmp->log_emissions_2 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_2 * 						      sizeof(double)));      init_float_mtx(hmmp->log_emissions_2, DEFAULT, hmmp->nr_v * hmmp->a_size_2);    }         if(hmmp->nr_alphabets > 2) {      hmmp->emissions_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * 						sizeof(double)));      hmmp->log_emissions_3 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_3 * 						    sizeof(double)));      init_float_mtx(hmmp->log_emissions_3, DEFAULT, hmmp->nr_v * hmmp->a_size_3);    }    if(hmmp->nr_alphabets > 3) {      hmmp->emissions_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * 						sizeof(double)));      hmmp->log_emissions_4 = (double*)(malloc_or_die(hmmp->nr_v * hmmp->a_size_4 * 						  sizeof(double)));      init_float_mtx(hmmp->log_emissions_4, DEFAULT, hmmp->nr_v * hmmp->a_size_4);    }        hmmp->modules = (struct module_multi_s**)(malloc_or_die(hmmp->nr_m * sizeof(struct module_multi_s*) +							    hmmp->nr_m * sizeof(struct module_multi_s)));    module = (struct module_multi_s*)(hmmp->modules + hmmp->nr_m);    hmmp->silent_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int)));    hmmp->locked_vertices = (int*)(malloc_or_die((hmmp->nr_v + 1) * sizeof(int)));      for(i = 0; i < hmmp->nr_v; i++) {      *(hmmp->locked_vertices + i) = NO;    }    hmmp->vertex_labels = (char*)(malloc_or_die(hmmp->nr_v * sizeof(char)));    hmmp->vertex_trans_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double)));        hmmp->vertex_emiss_prior_scalers = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double)));    if(hmmp->nr_alphabets > 1) {      hmmp->vertex_emiss_prior_scalers_2 = (double*)(malloc_or_die(hmmp->nr_v * sizeof(double)));    }    if(hmmp->nr_alphabets > 2) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -