⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 modseqalign.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 3 页
字号:
  /* read hmm */  /* get hmm from file */  if(hmmfile != NULL) {    hmmfiletype = readhmm_check(hmmfile);    if(hmmfiletype == SINGLE_HMM) {      readhmm(hmmfile, &hmm);    }    else if(hmmfiletype == MULTI_HMM) {      readhmm_multialpha(hmmfile, &hmm);    }    hmm.subst_mtx = subst_mtxp;    hmm.subst_mtx_2 = subst_mtxp_2;    hmm.subst_mtx_3 = subst_mtxp_3;    hmm.subst_mtx_4 = subst_mtxp_4;  }  else {    /* cannot happen */    printf("Internal error: hmmfile not found, should have been detected earlier\n");  }  hmm.replacement_letters = &replacement_letters;    /* read template seq */  /* allocate space for msa_seq_info structs */  if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    msa_seq_infop_template = (struct msa_sequences_multi_s*)(malloc_or_die(1 * sizeof(struct msa_sequences_multi_s)));  }  else if(seq_format == FASTA || seq_format == STANDARD) {    seq_info_template.seqs = malloc_or_die(sizeof(struct sequence_multi_s) * 1);    seq_info_template.nr_alphabets = hmm.nr_alphabets;    seq_info_template.nr_seqs = 1;    seq_info_template.longest_seq = 0;    seq_info_template.shortest_seq = INT_MAX;    seq_info_template.avg_seq_len = 0;  }  /* check sequence file for labels + check nolabel flag */  if(seqfile_has_labels(template_seqfile) == YES) {    use_labels = YES;  }  else {    use_labels = NO;  }  if(args_info.nolabels_given) {    use_labels = NO;  }    if(seq_format == FASTA) {    if(hmm.nr_alphabets > 1) {      printf("fasta is a one alphabet format only\n");      exit(0);    }    else {      get_sequence_fasta_multi(template_seqfile, &seq_info_template, 0);      hmm.alphabet_type = DISCRETE;      if(use_labels == YES) {	get_labels_multi(template_seqfile, &seq_info_template, &hmm, 0);      }    }  }  else if(seq_format == STANDARD) {    get_sequence_std_multi(template_seqfile, &seq_info_template, &hmm, 0);    if(use_labels == YES) {      get_labels_multi(template_seqfile, &seq_info_template, &hmm, 0);      }  }  else if(seq_format == MSA_STANDARD) {    if(use_lead_columns == YES) {      get_sequences_msa_std_multi(template_seqfile, NULL, msa_seq_infop_template, &hmm,				  lead_seq, &replacement_letters);    }    else {      get_sequences_msa_std_multi(template_seqfile, NULL, msa_seq_infop_template, &hmm, -1, &replacement_letters);    }        /* get labels */    if(use_labels == YES) {      if(use_lead_columns == YES) {	get_msa_labels_multi(template_seqfile, msa_seq_infop_template, &hmm);      }      else {	get_msa_labels_all_columns_multi(template_seqfile, msa_seq_infop_template, &hmm);      }    }  }  else if(seq_format == PROFILE) {    get_sequences_msa_prf_multi(template_seqfile, NULL, msa_seq_infop_template, &hmm);  }  fclose(template_seqfile);  if(seq_format == STANDARD || seq_format == FASTA) {    seq_info_template.avg_seq_len = ((int)(seq_info_template.avg_seq_len / seq_info_template.nr_seqs));    //dump_labeled_seqs_multi(&seq_info_template);  }  else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {      }    /* score template seq, get path */  /* get seq_length */  if(seq_format == STANDARD || seq_format == FASTA) {    seq_len_template = get_seq_length(seq_info_template.seqs->seq_1);  }  else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    if(use_lead_columns == YES) {      seq_len_template = msa_seq_infop_template->nr_lead_columns;    }    else {      seq_len_template = msa_seq_infop_template->msa_seq_length;    }  }  if(seq_format == STANDARD || seq_format == FASTA) {    viterbi_multi(&hmm, seq_info_template.seqs->seq_1, seq_info_template.seqs->seq_2, seq_info_template.seqs->seq_3,		  seq_info_template.seqs->seq_4,		  &viterbi_mtx, use_labels, multi_scoring_method);    viterbi_score = (viterbi_mtx + get_mtx_index(seq_len_template+1, hmm.nr_v-1, hmm.nr_v))->prob;    if(viterbi_score == DEFAULT) {      viterbi_score = log10(0.0);    }  }  else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    msa_viterbi_multi(&hmm, msa_seq_infop_template, use_lead_columns, use_gap_shares, use_prior, &viterbi_mtx, use_labels, normalize,		      scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);    viterbi_score = (viterbi_mtx + get_mtx_index(seq_len_template+1, hmm.nr_v-1, hmm.nr_v))->prob;    if(viterbi_score == DEFAULT) {      viterbi_score = log10(0.0);    }  }  if(seq_format == STANDARD || seq_format == FASTA) {    path_template = (int*)malloc_or_die((seq_len_template+1) * 2 * sizeof(int));    path_incrementable_template = path_template;    b = 0;    get_viterbi_path_multi(viterbi_mtx + get_mtx_index(seq_len_template+1,hmm.nr_v-1,hmm.nr_v), &hmm, viterbi_mtx,			   seq_len_template + 1, hmm.nr_v, path_incrementable_template, &b);  }    if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    path_template = (int*)malloc_or_die((seq_len_template+1) * 2 * sizeof(int));    path_incrementable_template = path_template;      b = 0;      get_viterbi_path_multi(viterbi_mtx + get_mtx_index(seq_len_template+1,hmm.nr_v-1,hmm.nr_v), &hmm, viterbi_mtx,			     seq_len_template + 1, hmm.nr_v, path_incrementable_template, &b);  }    /* print*/#ifdef DEBUG_ALIGN  printf("Path:\n");  for(k = 0; k < seq_len_template; k++) {    if(k % 60 == 0 && k != 0) {      printf("\n");    }    printf("%d ", path_template[k]);  }  printf("\n");#endif    /* deallocate stuff */  free(viterbi_mtx);      /* read seq 2 */  /* allocate space for msa_seq_info structs */  if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    msa_seq_infop_target = (struct msa_sequences_multi_s*)(malloc_or_die(1 * sizeof(struct msa_sequences_multi_s)));  }  else if(seq_format == FASTA || seq_format == STANDARD) {    seq_info_target.seqs = malloc_or_die(sizeof(struct sequence_multi_s) * 1);    seq_info_target.nr_alphabets = hmm.nr_alphabets;    seq_info_target.nr_seqs = 1;    seq_info_target.longest_seq = 0;    seq_info_target.shortest_seq = INT_MAX;    seq_info_target.avg_seq_len = 0;  }  /* check sequence file for labels + check nolabel flag */  if(seqfile_has_labels(target_seqfile) == YES) {    use_labels = YES;  }  else {    use_labels = NO;  }  if(args_info.nolabels_given) {    use_labels = NO;  }    if(seq_format == FASTA) {    if(hmm.nr_alphabets > 1) {      printf("fasta is a one alphabet format only\n");      exit(0);    }    else {      get_sequence_fasta_multi(target_seqfile, &seq_info_target, 0);      hmm.alphabet_type = DISCRETE;      if(use_labels == YES) {	get_labels_multi(target_seqfile, &seq_info_target, &hmm, 0);      }    }  }  else if(seq_format == STANDARD) {    get_sequence_std_multi(target_seqfile, &seq_info_target, &hmm, 0);    if(use_labels == YES) {      get_labels_multi(target_seqfile, &seq_info_target, &hmm, 0);      }  }  else if(seq_format == MSA_STANDARD) {    if(use_lead_columns == YES) {      get_sequences_msa_std_multi(target_seqfile, NULL, msa_seq_infop_target, &hmm,				  lead_seq, &replacement_letters);    }    else {      get_sequences_msa_std_multi(target_seqfile, NULL, msa_seq_infop_target, &hmm, -1, &replacement_letters);    }        /* get labels */    if(use_labels == YES) {      if(use_lead_columns == YES) {	get_msa_labels_multi(target_seqfile, msa_seq_infop_target, &hmm);      }      else {	get_msa_labels_all_columns_multi(target_seqfile, msa_seq_infop_target, &hmm);      }    }  }  else if(seq_format == PROFILE) {    get_sequences_msa_prf_multi(target_seqfile, NULL, msa_seq_infop_target, &hmm);  }  fclose(target_seqfile);  if(seq_format == STANDARD || seq_format == FASTA) {    seq_info_target.avg_seq_len = ((int)(seq_info_target.avg_seq_len / seq_info_target.nr_seqs));    //dump_labeled_seqs_multi(&seq_info_target);  }  else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {      }    /* score seq 2, get path */  /* get seq_length */  if(seq_format == STANDARD || seq_format == FASTA) {    seq_len_target = get_seq_length(seq_info_target.seqs->seq_1);  }  else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    if(use_lead_columns == YES) {      seq_len_target = msa_seq_infop_target->nr_lead_columns;    }    else {      seq_len_target = msa_seq_infop_target->msa_seq_length;    }  }  if(seq_format == STANDARD || seq_format == FASTA) {    viterbi_multi(&hmm, seq_info_target.seqs->seq_1, seq_info_target.seqs->seq_2, seq_info_target.seqs->seq_3,		  seq_info_target.seqs->seq_4,		  &viterbi_mtx, use_labels, multi_scoring_method);    viterbi_score = (viterbi_mtx + get_mtx_index(seq_len_target+1, hmm.nr_v-1, hmm.nr_v))->prob;    if(viterbi_score == DEFAULT) {      viterbi_score = log10(0.0);    }  }  else if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    msa_viterbi_multi(&hmm, msa_seq_infop_target, use_lead_columns, use_gap_shares, use_prior, &viterbi_mtx, use_labels, normalize,		      scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4);    viterbi_score = (viterbi_mtx + get_mtx_index(seq_len_target+1, hmm.nr_v-1, hmm.nr_v))->prob;    if(viterbi_score == DEFAULT) {      viterbi_score = log10(0.0);    }  }  if(seq_format == STANDARD || seq_format == FASTA) {    path_target = (int*)malloc_or_die((seq_len_target+1) * 2 * sizeof(int));    path_incrementable_target = path_target;    b = 0;    get_viterbi_path_multi(viterbi_mtx + get_mtx_index(seq_len_target+1,hmm.nr_v-1,hmm.nr_v), &hmm, viterbi_mtx,			   seq_len_target + 1, hmm.nr_v, path_incrementable_target, &b);  }    if(seq_format == MSA_STANDARD || seq_format == PROFILE) {    path_target = (int*)malloc_or_die((seq_len_target+1) * 2 * sizeof(int));    path_incrementable_target = path_target;      b = 0;      get_viterbi_path_multi(viterbi_mtx + get_mtx_index(seq_len_target+1,hmm.nr_v-1,hmm.nr_v), &hmm, viterbi_mtx,			     seq_len_target + 1, hmm.nr_v, path_incrementable_target, &b);  }    /* print*/#ifdef DEBUG_ALIGN  printf("Path:\n");  for(k = 0; k < seq_len_target; k++) {    if(k % 60 == 0 && k != 0) {      printf("\n");    }    printf("%d ", path_target[k]);  }  printf("\n");#endif  /* deallocate stuff */  free(viterbi_mtx);  /* align paths so that maximum number of letters are emitted by the same state */    //get seq names and send along  fprintf(outfile, "Aligment based on same-state-emission\n");  fprintf(outfile, "Target file: %s\n", args_info.target_arg);  fprintf(outfile, "Template file: %s\n", args_info.template_arg);  align_paths(&path_template, &path_target, seq_len_target, seq_len_template, outfile, seq_format);    /* garbage collection */  if(replfile != NULL) {    if(replacement_letters.nr_rl_1 > 0) {      free(replacement_letters.letters_1);      free(replacement_letters.probs_1);    }    if(replacement_letters.nr_rl_2 > 0) {      free(replacement_letters.letters_2);      free(replacement_letters.probs_2);    }    if(replacement_letters.nr_rl_3 > 0) {      free(replacement_letters.letters_3);      free(replacement_letters.probs_3);    }    if(replacement_letters.nr_rl_4 > 0) {      free(replacement_letters.letters_4);      free(replacement_letters.probs_4);    }    fclose(replfile);  }    /* free allocated sequence memory for msa/profile and single seqs respectively */   if(seq_format == MSA_STANDARD || seq_format == PROFILE) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -