📄 hmmsearch.c
字号:
NO, NO, normalize, scoring_method, NO, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4, NO); } if(verbose == YES) { printf("retraining done\n"); } } /* get score info for this seq and print it to outfile */ if(verbose == YES) { printf("scoring ...\n"); } get_scores(run_max_d, run_forward, run_backward, run_viterbi, run_n_best, seq_format, prf_mode, use_gap_shares, use_prior, use_labels, normalize, scoring_method, multi_scoring_method, print_labels, print_post_probs, print_log_like, print_log_odds, print_reversi, print_path, outfile); if(verbose == YES) { printf("scoring done\n"); } /* deallocate retrain_hmm */ if(run_max_d == YES) { hmm_garbage_collection_multi_no_dirichlet(NULL, &retrain_hmm); } /* deallocate seqinfo */ if(seq_format == STANDARD || seq_format == FASTA) { free(((seq_info.seqs))->seq_1); if(hmm.nr_alphabets > 1) { free((seq_info.seqs)->seq_2); } if(hmm.nr_alphabets > 2) { free((seq_info.seqs)->seq_3); } if(hmm.nr_alphabets > 3) { free((seq_info.seqs)->seq_4); } free(seq_info.seqs); } if(seq_format == MSA_STANDARD || seq_format == PROFILE) { free((*(msa_seq_infop)).msa_seq_1); if(hmm.nr_alphabets > 1) { free((*(msa_seq_infop)).msa_seq_2); } if(hmm.nr_alphabets > 2) { free((*(msa_seq_infop)).msa_seq_3); } if(hmm.nr_alphabets > 3) { free((*(msa_seq_infop)).msa_seq_4); } free((*(msa_seq_infop)).gaps); } free(msa_seq_infop); fclose(outfile); } /* deallocate hmm_info */ hmm_garbage_collection_multi(hmmfile, &hmm); } } /* deallocate null model */ if(nullfile != NULL) { if(null_model.nr_alphabets >= 1) { free(null_model.emissions); } if(null_model.nr_alphabets >= 2) { free(null_model.emissions_2); } if(null_model.nr_alphabets >= 3) { free(null_model.emissions_3); } if(null_model.nr_alphabets >= 4) { free(null_model.emissions_4); } fclose(nullfile); } /* deallocate subst mtx */ if(substmtxfile != NULL) { free(subst_mtxp); if(hmm.nr_alphabets > 1) { free(subst_mtxp_2); } if(hmm.nr_alphabets > 2) { free(subst_mtxp_3); } if(hmm.nr_alphabets > 3) { free(subst_mtxp_4); } fclose(substmtxfile); } /* deallocate freqs */ if(freqfile != NULL) { free(aa_freqs); if(hmm.nr_alphabets > 1) { free(aa_freqs_2); } if(hmm.nr_alphabets > 2) { free(aa_freqs_3); } if(hmm.nr_alphabets > 3) { free(aa_freqs_4); } fclose(freqfile); } /* deallocate replacement letters */ if(replfile != NULL) { if(replacement_letters.nr_rl_1 > 0) { free(replacement_letters.letters_1); free(replacement_letters.probs_1); } if(replacement_letters.nr_rl_2 > 0) { free(replacement_letters.letters_2); free(replacement_letters.probs_2); } if(replacement_letters.nr_rl_3 > 0) { free(replacement_letters.letters_3); free(replacement_letters.probs_3); } if(replacement_letters.nr_rl_4 > 0) { free(replacement_letters.letters_4); free(replacement_letters.probs_4); } fclose(replfile); }}/*************************************************************************************************/void get_null_model_multi(FILE *nullfile){ int i,j; char s[MAX_LINE]; if(nullfile != NULL) { /* read nullfile */ while(1) { if(fgets(s, MAX_LINE, nullfile) == NULL) { printf("Could not read null model file\n"); exit(0); } if(s[0] == '#' || s[0] == '\n') { continue; } else { null_model.nr_alphabets = atoi(s); break; } } for(i = 0; i < null_model.nr_alphabets; i++) { while(1) { if(fgets(s, MAX_LINE, nullfile) == NULL) { printf("Could not read null model file\n"); exit(0); } if(s[0] == '#' || s[0] == '\n') { continue; } else { switch(i) { case 0: null_model.a_size = atoi(s); null_model.emissions = (double*)malloc_or_die(null_model.a_size * sizeof(double)); break; case 1: null_model.a_size_2 = atoi(s); null_model.emissions_2 = (double*)malloc_or_die(null_model.a_size_2 * sizeof(double)); break; case 2: null_model.a_size_3 = atoi(s); null_model.emissions_3 = (double*)malloc_or_die(null_model.a_size_3 * sizeof(double)); break; case 3: null_model.a_size_4 = atoi(s); null_model.emissions_4 = (double*)malloc_or_die(null_model.a_size_4 * sizeof(double)); break; } break; } } j = 0; switch(i) { case 0: while(j < null_model.a_size) { if (fgets(s, MAX_LINE, nullfile) != NULL) { if(s[0] != '#' && s[0] != '\n') { *(null_model.emissions + j) = atof(s); j++; } } else { printf("Could not read null model file\n"); exit(0); } } break; case 1: while(j < null_model.a_size_2) { if (fgets(s, MAX_LINE, nullfile) != NULL) { if(s[0] != '#' && s[0] != '\n') { *(null_model.emissions_2 + j) = atof(s); j++; } } else { printf("Could not read null model file\n"); exit(0); } } break; case 2: while(j < null_model.a_size_3) { if (fgets(s, MAX_LINE, nullfile) != NULL) { if(s[0] != '#' && s[0] != '\n') { *(null_model.emissions_3 + j) = atof(s); j++; } } else { printf("Could not read null model file\n"); exit(0); } } break; case 3: while(j < null_model.a_size_4) { if (fgets(s, MAX_LINE, nullfile) != NULL) { if(s[0] != '#' && s[0] != '\n') { *(null_model.emissions_4 + j) = atof(s); j++; } } else { printf("Could not read null model file\n"); exit(0); } } break; } } while(1) { if(fgets(s, MAX_LINE, nullfile) != NULL) { if(s[0] != '#' && s[0] != '\n') { null_model.trans_prob = atof(s); break; } } else { printf("Could not read null model file\n"); exit(0); } } } else { null_model.a_size = -1; null_model.nr_alphabets = -1; }}/*********************score methods******************************************/void get_scores(int run_max_d, int run_forward, int run_backward, int run_viterbi, int run_n_best, int seq_format, int prf_mode, int use_gap_shares, int use_prior, int use_labels, int normalize, int scoring_method, int multi_scoring_method, int print_labels, int print_post_probs, int print_log_like, int print_log_odds, int print_reversi, int print_path, FILE *outfile){ struct forward_s *forw_mtx; struct viterbi_s *viterbi_mtx; struct one_best_s *one_best_mtx; struct backward_s *backw_mtx; struct forward_s *rev_forw_mtx; struct viterbi_s *rev_viterbi_mtx; double *forw_scale, *rev_forw_scale, *scaling_factors; char *labels; int use_lead_columns; struct letter_s *reverse_seq, *reverse_seq_2, *reverse_seq_3, *reverse_seq_4; struct msa_sequences_multi_s reverse_msa_seq_info; double nullmodel_score; double norm_log_like, logodds, reversi; int *path, *path_incrementable; int seq_len; double viterbi_score, forward_score, backward_score, one_best_score, rev_viterbi_score, rev_forward_score, raw_forward_score; double *post_prob_matrix, *post_prob_label_matrix; int label_nr; int a,b; int i,j,k; struct hmm_multi_s *hmmp; if(run_max_d == YES) { hmmp = &retrain_hmm; } else { hmmp = &hmm; } if(prf_mode == ALL_SEQS) { use_lead_columns = NO; } else if(prf_mode == LEAD_SEQ) { use_lead_columns = YES; } else { printf("Internal error: strange profile mode\n"); exit(0); } /* get seq_length */ if(seq_format == STANDARD || seq_format == FASTA) { seq_len = get_seq_length(seq_info.seqs->seq_1); } else if(seq_format == MSA_STANDARD || seq_format == PROFILE) { if(use_lead_columns == YES) { seq_len = msa_seq_infop->nr_lead_columns; } else { seq_len = msa_seq_infop->msa_seq_length; } } fprintf(outfile, "Seq length: %d\n", seq_len); /* if needed run forward */ if(run_forward == YES) { if(seq_format == STANDARD || seq_format == FASTA) { forward_multi(hmmp, seq_info.seqs->seq_1, seq_info.seqs->seq_2, seq_info.seqs->seq_3, seq_info.seqs->seq_4, &forw_mtx, &forw_scale, use_labels, multi_scoring_method); raw_forward_score = (forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob; forward_score = log10((forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob); for (j = seq_len; j > 0; j--) { forward_score = forward_score + log10(*(forw_scale + j)); } } else if(seq_format == MSA_STANDARD || seq_format == PROFILE) { msa_forward_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, use_prior, &forw_mtx, &forw_scale, use_labels, normalize,scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4); raw_forward_score = (forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob; forward_score = log10((forw_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob); for (j = seq_len; j > 0; j--) { forward_score = forward_score + log10(*(forw_scale + j)); } } } /* if needed run backward */ if(run_backward == YES) { if(seq_format == STANDARD || seq_format == FASTA) { backward_multi(hmmp, seq_info.seqs->seq_1, seq_info.seqs->seq_2, seq_info.seqs->seq_3, seq_info.seqs->seq_4, &backw_mtx, forw_scale, use_labels, multi_scoring_method); backward_score = log10((backw_mtx + get_mtx_index(0, 0, hmmp->nr_v))->prob); for (j = seq_len; j > 0; j--) { backward_score = backward_score + log10(*(forw_scale + j)); } } else if(seq_format == MSA_STANDARD || seq_format == PROFILE) { msa_backward_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, &backw_mtx, forw_scale, use_labels, normalize, scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4); backward_score = log10((backw_mtx + get_mtx_index(0, 0, hmmp->nr_v))->prob); for (j = seq_len; j > 0; j--) { backward_score = backward_score + log10(*(forw_scale + j)); } } } /* if needed run viterbi */ if(run_viterbi == YES || print_path == YES) { if(seq_format == STANDARD || seq_format == FASTA) { viterbi_multi(hmmp, seq_info.seqs->seq_1, seq_info.seqs->seq_2, seq_info.seqs->seq_3, seq_info.seqs->seq_4, &viterbi_mtx, use_labels, multi_scoring_method); viterbi_score = (viterbi_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob; if(viterbi_score == DEFAULT) { viterbi_score = log10(0.0); } } else if(seq_format == MSA_STANDARD || seq_format == PROFILE) { msa_viterbi_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, use_prior, &viterbi_mtx, use_labels, normalize, scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4); viterbi_score = (viterbi_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob; if(viterbi_score == DEFAULT) { viterbi_score = log10(0.0); } } } /* if needed run n-best */ if(run_n_best == YES) { labels = (char*)malloc_or_die((seq_len + 1) * sizeof(char)); if(seq_format == STANDARD || seq_format == FASTA) { one_best_multi(hmmp, seq_info.seqs->seq_1, seq_info.seqs->seq_2, seq_info.seqs->seq_3, seq_info.seqs->seq_4, &one_best_mtx, &scaling_factors, use_labels, labels, multi_scoring_method); one_best_score = log10((one_best_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob); for (j = seq_len; j > 0; j--) { one_best_score = one_best_score + log10(*(scaling_factors + j)); } } else if(seq_format == MSA_STANDARD || seq_format == PROFILE) { msa_one_best_multi(hmmp, msa_seq_infop, use_lead_columns, use_gap_shares, use_prior, &one_best_mtx, &scaling_factors, use_labels, labels, normalize, scoring_method, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4); one_best_score = log10((one_best_mtx + get_mtx_index(seq_len+1, hmmp->nr_v-1, hmmp->nr_v))->prob); for (j = seq_len; j > 0; j--) { one_best_score = one_best_score + log10(*(scaling_factors + j)); } } } /* get log likelihod score + print + deallocate */ if(print_log_like == YES) { if(run_viterbi == YES) { norm_log_like = 0.0 - (viterbi_score/(double)(seq_len)); } else if(run_forward == YES) { norm_log_like = 0.0 - (forward_score/(double)(seq_len)); } else { } /* print to outfile */ fprintf(outfile, "Normalized log likelihood: %.4f\n", norm_log_like); } /* get log odds score + print + deallocate */ if(print_log_odds == YES) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -