⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmtrain.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 2 页
字号:
    printf("Internal error: hmmfile not found, should have been detected earlier\n");  }  /* check if priors are present and if noprior option is set */  if(args_info.noprior_given) {    use_prior = NO;  }  else if(hmm.nr_ed > 0) {    use_prior = YES;  }    /* get replacement letters */  if(replfile != NULL) {    get_replacement_letters_multi(replfile, &replacement_letters);  }  else {    replacement_letters.nr_alphabets = 0;    replacement_letters.nr_rl_1 = 0;    replacement_letters.nr_rl_2 = 0;    replacement_letters.nr_rl_3 = 0;    replacement_letters.nr_rl_4 = 0;    replacement_letters.letters_1 = NULL;    replacement_letters.probs_1 = NULL;    replacement_letters.letters_2 = NULL;    replacement_letters.probs_2 = NULL;    replacement_letters.letters_3 = NULL;    replacement_letters.probs_3 = NULL;    replacement_letters.letters_4 = NULL;    replacement_letters.probs_4 = NULL;  }  hmm.replacement_letters = &replacement_letters;  /* get training seqences */  /* Note: memory for the sequences will be allocated in get_sequences method, but   * must be freed here */    /* count nr of sequences */  nr_seqs = 0;  while(fgets(seq_name, MAX_LINE, seqnamefile) != NULL) {    nr_seqs++;  }  rewind(seqnamefile);    /* allocate space for msa_seq_info structs */  if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    msa_seq_infop = (struct msa_sequences_multi_s*)(malloc_or_die(nr_seqs * sizeof(struct msa_sequences_multi_s)));  }  else if(seq_format == FASTA || seq_format == STANDARD) {    seq_info.seqs = malloc_or_die(sizeof(struct sequence_multi_s) * nr_seqs);    seq_info.nr_alphabets = hmm.nr_alphabets;    seq_info.nr_seqs = nr_seqs;    seq_info.longest_seq = 0;    seq_info.shortest_seq = INT_MAX;    seq_info.avg_seq_len = 0;  }  /* read all the sequences */  seq_counter = 0;  nr_read_seqs = 0;  while(fgets(seq_name, MAX_LINE, seqnamefile) != NULL) {    seq_name[strlen(seq_name)-1] = '\0';    if((seqfile = fopen(seq_name, "r")) == NULL) {      perror(seq_name);      continue;    }        /* check sequence file for labels + check nolabel flag */    if(seqfile_has_labels(seqfile) == YES) {      use_labels = YES;    }    else {      use_labels = NO;    }    if(args_info.nolabels_given) {      use_labels = NO;    }    if(seq_format == FASTA) {      if(hmm.nr_alphabets > 1) {	printf("fasta is a one alphabet format only\n");	exit(0);      }      else {	get_sequence_fasta_multi(seqfile, &seq_info, nr_read_seqs);	hmm.alphabet_type = DISCRETE;	if(use_labels == YES) {	  get_labels_multi(seqfile, &seq_info, &hmm, nr_read_seqs);	}      }    }    else if(seq_format == STANDARD) {      get_sequence_std_multi(seqfile, &seq_info, &hmm, nr_read_seqs);      if(use_labels == YES) {	get_labels_multi(seqfile, &seq_info, &hmm, nr_read_seqs);      }    }    else if(seq_format == MSA_STANDARD) {      if(use_lead_columns == YES) {	get_sequences_msa_std_multi(seqfile, NULL, msa_seq_infop + seq_counter, &hmm,				    lead_seq, &replacement_letters);      }      else {	get_sequences_msa_std_multi(seqfile, NULL, msa_seq_infop + seq_counter, &hmm, -1, &replacement_letters);      }            /* get labels */      if(use_labels == YES) {	if(use_lead_columns == YES) {	  get_msa_labels_multi(seqfile, msa_seq_infop + seq_counter, &hmm);	}	else {	  get_msa_labels_all_columns_multi(seqfile, msa_seq_infop + seq_counter, &hmm);	}      }    }    else if(seq_format == PROFILE) {      get_sequences_msa_prf_multi(seqfile, NULL, msa_seq_infop + seq_counter, &hmm);    }    nr_read_seqs++;    seq_counter++;    fclose(seqfile);  }  if(seq_format == STANDARD || seq_format == FASTA) {    seq_info.avg_seq_len = ((int)(seq_info.avg_seq_len / seq_info.nr_seqs));    //dump_labeled_seqs_multi(&seq_info);  }  else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {      }   /* do training */  if(nr_read_seqs > 0) {    if(training_method == BW_STD) {      /* do baum welch training */      if(seq_format == STANDARD || seq_format == FASTA) {	baum_welch_dirichlet_multi(&hmm, seq_info.seqs, seq_info.nr_seqs, annealing, use_labels, use_transition_pseudo_counts,				   use_emission_pseudo_counts, multi_scoring_method, use_prior);      }      else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {	msa_baum_welch_dirichlet_multi(&hmm, msa_seq_infop, nr_seqs, annealing, use_gap_shares, use_lead_columns, use_labels,				       use_transition_pseudo_counts, use_emission_pseudo_counts, normalize, scoring_method,				       use_nr_occurences, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4,				       use_prior);      }    }    else if(training_method == CML_STD && use_labels == YES) {      /* do cml training */      if(seq_format == STANDARD || seq_format == FASTA) {	extended_baum_welch_dirichlet_multi(&hmm, seq_info.seqs, seq_info.nr_seqs, annealing, use_labels,					   use_transition_pseudo_counts,					   use_emission_pseudo_counts, multi_scoring_method, use_prior);      }      else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {	extended_msa_baum_welch_dirichlet_multi(&hmm, msa_seq_infop, nr_seqs, annealing, use_gap_shares, use_lead_columns, use_labels,						use_transition_pseudo_counts, use_emission_pseudo_counts, normalize, scoring_method,						use_nr_occurences, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3,						aa_freqs_4, use_prior);      }    }    else if(training_method == CML_STD && use_labels == NO) {      printf("Error: cannot do CML-training on unlabeled sequences\n");      exit(0);    }  }  else {    printf("Error: no sequences read: cannot train\n");    exit(0);  }     /* save trained hmm */  if(hmmfiletype == MULTI_HMM) {    if(outfile != NULL) {      savehmm_multialpha(outfile, &hmm);    }    else {      outfile = fopen("a.hmg", "w");      savehmm_multialpha(outfile, &hmm);    }  }  else if(hmmfiletype == SINGLE_HMM) {    if(outfile != NULL) {      savehmm(outfile, &hmm);    }    else {      outfile = fopen("a.hmg", "w");      savehmm(outfile, &hmm);    }  }  /* garbage collection */  if(replfile != NULL) {    if(replacement_letters.nr_rl_1 > 0) {      free(replacement_letters.letters_1);      free(replacement_letters.probs_1);    }    if(replacement_letters.nr_rl_2 > 0) {      free(replacement_letters.letters_2);      free(replacement_letters.probs_2);    }    if(replacement_letters.nr_rl_3 > 0) {      free(replacement_letters.letters_3);      free(replacement_letters.probs_3);    }    if(replacement_letters.nr_rl_4 > 0) {      free(replacement_letters.letters_4);      free(replacement_letters.probs_4);    }    fclose(replfile);  }    /* free allocated sequence memory for msa/profile and single seqs respectively */   if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    for(i = 0; i < nr_read_seqs; i++) {      free((*(msa_seq_infop + i)).msa_seq_1);      if(hmm.nr_alphabets > 1) {	free((*(msa_seq_infop + i)).msa_seq_2);      }      if(hmm.nr_alphabets > 2) {	free((*(msa_seq_infop + i)).msa_seq_3);      }      if(hmm.nr_alphabets > 3) {	free((*(msa_seq_infop + i)).msa_seq_4);      }      free((*(msa_seq_infop + i)).gaps);      free((*(msa_seq_infop + i)).gap_shares);      free((*(msa_seq_infop + i)).lead_columns_start);    }    free(msa_seq_infop);  }  else if(seq_format == FASTA || seq_format == STANDARD) {    for(i = 0; i < nr_read_seqs; i++) {      free(((seq_info.seqs) + i)->seq_1);      if(hmm.nr_alphabets > 1) {	free((seq_info.seqs + i)->seq_2);      }      if(hmm.nr_alphabets > 2) {	free((seq_info.seqs + i)->seq_3);      }      if(hmm.nr_alphabets > 3) {	free((seq_info.seqs + i)->seq_4);      }    }    free(seq_info.seqs);  }    fclose(outfile);  if(substmtxfile != NULL) {    free(subst_mtxp);    if(hmm.nr_alphabets > 1) {      free(subst_mtxp_2);    }    if(hmm.nr_alphabets > 2) {      free(subst_mtxp_3);    }    if(hmm.nr_alphabets > 3) {      free(subst_mtxp_4);    }    fclose(substmtxfile);  }    if(freqfile != NULL) {    free(aa_freqs);    if(hmm.nr_alphabets > 1) {      free(aa_freqs_2);    }    if(hmm.nr_alphabets > 2) {      free(aa_freqs_3);    }    if(hmm.nr_alphabets > 3) {      free(aa_freqs_4);    }    fclose(freqfile);  }  hmm_garbage_collection_multi(hmmfile, &hmm);  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -