⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmsearch.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 5 页
字号:
					   NO, NO, normalize, scoring_method,					   NO, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4,					   NO);	  }	  if(verbose == YES) {	    printf("retraining done\n");	  }	}	  		/* get score info for this seq and print it to outfile */	if(verbose == YES) {	  printf("scoring ...\n");	}	get_scores(run_max_d, run_forward, run_backward, run_viterbi, run_n_best, seq_format, prf_mode,		   use_gap_shares, use_prior, use_labels, normalize, scoring_method, multi_scoring_method,		   print_labels, print_post_probs, print_log_like, print_log_odds, print_reversi, print_path,		   outfile);	if(verbose == YES) {	  printf("scoring done\n");	}		/* deallocate retrain_hmm */	if(run_max_d == YES) {	  hmm_garbage_collection_multi_no_dirichlet(NULL, &retrain_hmm);	}		/* deallocate seqinfo */	if(seq_format == STANDARD || seq_format == FASTA) {	  free(((seq_info.seqs))->seq_1);	  if(hmm.nr_alphabets > 1) {	    free((seq_info.seqs)->seq_2);	  }	  if(hmm.nr_alphabets > 2) {	    free((seq_info.seqs)->seq_3);	  }	  if(hmm.nr_alphabets > 3) {	    free((seq_info.seqs)->seq_4);	  }	  free(seq_info.seqs);	}	if(seq_format == MSA_STANDARD || seq_format == PROFILE) {	  free((*(msa_seq_infop)).msa_seq_1);	  if(hmm.nr_alphabets > 1) {	    free((*(msa_seq_infop)).msa_seq_2);	  }	  if(hmm.nr_alphabets > 2) {	    free((*(msa_seq_infop)).msa_seq_3);	  }	  if(hmm.nr_alphabets > 3) {	    free((*(msa_seq_infop)).msa_seq_4);	  }	  free((*(msa_seq_infop)).gaps);	}	free(msa_seq_infop);	fclose(outfile);      }            /* deallocate hmm_info */      hmm_garbage_collection_multi(hmmfile, &hmm);    }  }  /* deallocate null model */  if(nullfile != NULL) {    if(null_model.nr_alphabets >= 1) {      free(null_model.emissions);    }    if(null_model.nr_alphabets >= 2) {      free(null_model.emissions_2);    }    if(null_model.nr_alphabets >= 3) {      free(null_model.emissions_3);    }	    if(null_model.nr_alphabets >= 4) {      free(null_model.emissions_4);    }    fclose(nullfile);  }    /* deallocate subst mtx */  if(substmtxfile != NULL) {    free(subst_mtxp);    if(hmm.nr_alphabets > 1) {      free(subst_mtxp_2);    }    if(hmm.nr_alphabets > 2) {      free(subst_mtxp_3);    }    if(hmm.nr_alphabets > 3) {      free(subst_mtxp_4);    }    fclose(substmtxfile);  }  /* deallocate freqs */  if(freqfile != NULL) {    free(aa_freqs);    if(hmm.nr_alphabets > 1) {      free(aa_freqs_2);    }    if(hmm.nr_alphabets > 2) {      free(aa_freqs_3);    }    if(hmm.nr_alphabets > 3) {      free(aa_freqs_4);    }    fclose(freqfile);  }  /* deallocate replacement letters */  if(replfile != NULL) {    if(replacement_letters.nr_rl_1 > 0) {      free(replacement_letters.letters_1);      free(replacement_letters.probs_1);    }    if(replacement_letters.nr_rl_2 > 0) {      free(replacement_letters.letters_2);      free(replacement_letters.probs_2);    }    if(replacement_letters.nr_rl_3 > 0) {      free(replacement_letters.letters_3);      free(replacement_letters.probs_3);    }    if(replacement_letters.nr_rl_4 > 0) {      free(replacement_letters.letters_4);      free(replacement_letters.probs_4);    }    fclose(replfile);  }}/*************************************************************************************************/void get_null_model_multi(FILE *nullfile){  int i,j;  char s[MAX_LINE];    if(nullfile != NULL) {    /* read nullfile */    while(1) {      if(fgets(s, MAX_LINE, nullfile) == NULL) {	printf("Could not read null model file\n");	exit(0);      }      if(s[0] == '#' || s[0] == '\n') {	continue;      }      else {	null_model.nr_alphabets = atoi(s);	break;      }    }    for(i = 0; i < null_model.nr_alphabets; i++) {      while(1) {	if(fgets(s, MAX_LINE, nullfile) == NULL) {	  printf("Could not read null model file\n");	  exit(0);	}	if(s[0] == '#' || s[0] == '\n') {	  continue;	}	else {	  switch(i) {	  case 0:	    null_model.a_size = atoi(s);	    null_model.emissions = (double*)malloc_or_die(null_model.a_size * sizeof(double));	    break;	  case 1:	    null_model.a_size_2 = atoi(s);	    null_model.emissions_2 = (double*)malloc_or_die(null_model.a_size_2 * sizeof(double));	    break;	  case 2:	    null_model.a_size_3 = atoi(s);	    null_model.emissions_3 = (double*)malloc_or_die(null_model.a_size_3 * sizeof(double));	    break;	  case 3:	    null_model.a_size_4 = atoi(s);	    null_model.emissions_4 = (double*)malloc_or_die(null_model.a_size_4 * sizeof(double));	    break;	  }	  break;	}      }      j = 0;      switch(i) {      case 0:	while(j < null_model.a_size) {	  if (fgets(s, MAX_LINE, nullfile) != NULL) {	    if(s[0] != '#' && s[0] != '\n') {	      *(null_model.emissions + j) = atof(s);	      j++;	    }	  }	  else {	    printf("Could not read null model file\n");	    exit(0);	  }	}	break;      case 1:	while(j < null_model.a_size_2) {	  if (fgets(s, MAX_LINE, nullfile) != NULL) {	    if(s[0] != '#' && s[0] != '\n') {	      *(null_model.emissions_2 + j) = atof(s);	      j++;	    }	  }	  else {	    printf("Could not read null model file\n");	    exit(0);	  }	}	break;	      case 2:	while(j < null_model.a_size_3) {	  if (fgets(s, MAX_LINE, nullfile) != NULL) {	    if(s[0] != '#' && s[0] != '\n') {	      *(null_model.emissions_3 + j) = atof(s);	      j++;	    }	  }	  else {	    printf("Could not read null model file\n");	    exit(0);	  }	}	break;      case 3:	while(j < null_model.a_size_4) {	  if (fgets(s, MAX_LINE, nullfile) != NULL) {	    if(s[0] != '#' && s[0] != '\n') {	      *(null_model.emissions_4 + j) = atof(s);	      j++;	    }	  }	  else {	    printf("Could not read null model file\n");	    exit(0);	  }	}	break;      }      }    while(1) {      if(fgets(s, MAX_LINE, nullfile) != NULL) {	if(s[0] != '#' && s[0] != '\n') {	  null_model.trans_prob = atof(s);	  break;	}      }      else {	printf("Could not read null model file\n");	exit(0);      }    }  }  else {    null_model.a_size = -1;    null_model.nr_alphabets = -1;  }}/*********************score methods******************************************/void get_scores(int run_max_d, int run_forward, int run_backward, int run_viterbi, int run_n_best,		int seq_format, int prf_mode,		int use_gap_shares, int use_prior, int use_labels, int normalize, int scoring_method,		int multi_scoring_method, int print_labels, int print_post_probs, int print_log_like,		int print_log_odds, int print_reversi, int print_path, FILE *outfile){  struct forward_s *forw_mtx;  struct viterbi_s *viterbi_mtx;  struct one_best_s *one_best_mtx;  struct backward_s *backw_mtx;  struct forward_s *rev_forw_mtx;  struct viterbi_s *rev_viterbi_mtx;  double *forw_scale, *rev_forw_scale, *scaling_factors;  char *labels;  int use_lead_columns;  struct letter_s *reverse_seq, *reverse_seq_2, *reverse_seq_3, *reverse_seq_4;  struct msa_sequences_multi_s reverse_msa_seq_info;  double nullmodel_score;  double norm_log_like, logodds, reversi;  int *path, *path_incrementable;  int seq_len;  double viterbi_score, forward_score, backward_score, one_best_score, rev_viterbi_score, rev_forward_score, raw_forward_score;  double *post_prob_matrix, *post_prob_label_matrix;  int label_nr;  int a,b;  int i,j,k;    struct hmm_multi_s *hmmp;  if(run_max_d == YES) {    hmmp = &retrain_hmm;  }  else {    hmmp = &hmm;  }  if(prf_mode == ALL_SEQS) {    use_lead_columns = NO;  }  else if(prf_mode == LEAD_SEQ) {    use_lead_columns = YES;  }  else {    printf("Internal error: strange profile mode\n");    exit(0);  }    /* get seq_length */  if(seq_format == STANDARD || seq_format == FASTA) {    seq_len = get_seq_length(seq_info.seqs->seq_1);  }  else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    if(use_lead_columns == YES) {      seq_len = msa_seq_infop->nr_lead_columns;    }    else {      seq_len = msa_seq_infop->msa_seq_length;    }  }  fprintf(outfile, "Seq length: %d\n", seq_len);    /* if needed run forward */  if(run_forward == YES) {    if(seq_format == STANDARD || seq_format == FASTA) {      forward_multi(hmmp, seq_info.seqs->seq_1, seq_info.seqs->seq_2, seq_info.seqs->seq_3, seq_info.seqs->seq_4,		    &forw_mtx, &forw_scale, use_labels, multi_scoring_method);      raw_forward_score = (forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob;      forward_score = log10((forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob);      for (j = seq_len; j > 0; j--) {	forward_score = forward_score + log10(*(forw_scale + j));      }    }    else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {      msa_forward_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, use_prior, &forw_mtx, &forw_scale,			use_labels, normalize,scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2,			aa_freqs_3, aa_freqs_4);      raw_forward_score = (forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob;      forward_score = log10((forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob);      for (j = seq_len; j > 0; j--) {	forward_score = forward_score + log10(*(forw_scale + j));      }    }  }    /* if needed run backward */  if(run_backward == YES) {    if(seq_format == STANDARD || seq_format == FASTA) {      backward_multi(hmmp, seq_info.seqs->seq_1, seq_info.seqs->seq_2, seq_info.seqs->seq_3, seq_info.seqs->seq_4,		     &backw_mtx, forw_scale, use_labels, multi_scoring_method);      backward_score = log10((backw_mtx + get_mtx_index(0, 0, hmmp->nr_v))->prob);      for (j = seq_len; j > 0; j--) {	backward_score = backward_score + log10(*(forw_scale + j));      }    }    else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {      msa_backward_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, &backw_mtx, forw_scale, use_labels, normalize,			 scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);      backward_score = log10((backw_mtx + get_mtx_index(0, 0, hmmp->nr_v))->prob);      for (j = seq_len; j > 0; j--) {	backward_score = backward_score + log10(*(forw_scale + j));      }    }  }    /* if needed run viterbi */  if(run_viterbi == YES || print_path == YES) {    if(seq_format == STANDARD || seq_format == FASTA) {      viterbi_multi(hmmp, seq_info.seqs->seq_1, seq_info.seqs->seq_2, seq_info.seqs->seq_3, seq_info.seqs->seq_4,		    &viterbi_mtx, use_labels, multi_scoring_method);      viterbi_score = (viterbi_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob;      if(viterbi_score == DEFAULT) {	viterbi_score = log10(0.0);      }    }    else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {      msa_viterbi_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, use_prior, &viterbi_mtx, use_labels, normalize,			scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);      viterbi_score = (viterbi_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob;      if(viterbi_score == DEFAULT) {	viterbi_score = log10(0.0);      }    }  }    /* if needed run n-best */  if(run_n_best == YES) {    labels = (char*)malloc_or_die((seq_len + 1) * sizeof(char));    if(seq_format == STANDARD || seq_format == FASTA) {      one_best_multi(hmmp, seq_info.seqs->seq_1, seq_info.seqs->seq_2, seq_info.seqs->seq_3, seq_info.seqs->seq_4,		     &one_best_mtx, &scaling_factors, use_labels, labels, multi_scoring_method);      one_best_score = log10((one_best_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob);      for (j = seq_len; j > 0; j--) {	one_best_score = one_best_score + log10(*(scaling_factors + j));      }    }    else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {      msa_one_best_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, use_prior,			 &one_best_mtx, &scaling_factors, use_labels, labels,			 normalize, scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);      one_best_score = log10((one_best_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob);      for (j = seq_len; j > 0; j--) {	one_best_score = one_best_score + log10(*(scaling_factors + j));      }    }  }  /* get log likelihod score + print + deallocate */  if(print_log_like == YES) {    if(run_viterbi == YES) {      norm_log_like = 0.0 - (viterbi_score/(double)(seq_len));    }    else if(run_forward == YES) {      norm_log_like = 0.0 - (forward_score/(double)(seq_len));    }    else {          }    /* print to outfile */    fprintf(outfile, "Normalized log likelihood: %.4f\n", norm_log_like);      }    /* get log odds score  + print + deallocate */  if(print_log_odds == YES) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -