📄 hmmsearch.c
字号:
/* use default null model */ emiss_prob = 1.0 / (double)hmm.a_size; if(hmm.nr_alphabets > 1) { emiss_prob_2 = 1.0 / (double)hmm.a_size_2; } if(hmm.nr_alphabets > 2) { emiss_prob_3 = 1.0 / (double)hmm.a_size_3; } if(hmm.nr_alphabets > 3) { emiss_prob_4 = 1.0 / (double)hmm.a_size_4; } trans_prob = (double)(seq_len)/(double)(seq_len + 1); null_model.a_size = hmm.a_size; null_model.emissions = (double*)malloc_or_die(null_model.a_size * sizeof(double)); if(hmm.nr_alphabets > 1) { null_model.a_size_2 = hmm.a_size_2; null_model.emissions_2 = (double*)malloc_or_die(null_model.a_size_2 * sizeof(double)); } if(hmm.nr_alphabets > 2) { null_model.a_size_3 = hmm.a_size_3; null_model.emissions_3 = (double*)malloc_or_die(null_model.a_size_3 * sizeof(double)); } if(hmm.nr_alphabets > 3) { null_model.a_size_4 = hmm.a_size_4; null_model.emissions_4 = (double*)malloc_or_die(null_model.a_size_4 * sizeof(double)); } for(k = 0; k < hmm.a_size; k++) { null_model.emissions[k] = emiss_prob; if(hmm.nr_alphabets > 1) { null_model.emissions_2[k] = emiss_prob_2; } if(hmm.nr_alphabets > 2) { null_model.emissions_3[k] = emiss_prob_3; } if(hmm.nr_alphabets > 3) { null_model.emissions_4[k] = emiss_prob_4; } } null_model.trans_prob = trans_prob; using_default_null_model = YES; } /* NOTE: must include other scoring methods here as well (copy from core_algorithms) */ /* use specified null model */ null_model_score = 0.0; if(scoring_method == DOT_PRODUCT) { l = 0; while(1) { /* set index for seq pointer */ if(prf_mode == ALL_SEQS) { p = l; } else { p = *(msa_seq_infop->lead_columns_start + l); } /* get col_score for the different alphabets */ col_score = 0.0; col_score_2 = 0.0; col_score_3 = 0.0; col_score_4 = 0.0; col_score = get_dp_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1, p, null_model.emissions, 0, normalize, msa_seq_infop->gap_shares); if(hmm.nr_alphabets > 1) { col_score_2 = get_dp_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2, p, null_model.emissions_2, 0, normalize, msa_seq_infop->gap_shares); } if(hmm.nr_alphabets > 2) { col_score_3 = get_dp_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3, p, null_model.emissions_3, 0, normalize, msa_seq_infop->gap_shares); } if(hmm.nr_alphabets > 3) { col_score_4 = get_dp_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4, p, null_model.emissions_4, 0, normalize, msa_seq_infop->gap_shares); } /* calculate total column score */ if(multi_scoring_method == JOINT_PROB) { null_model_score += log10(col_score) + log10(null_model.trans_prob); if(hmm.nr_alphabets > 1) { null_model_score += log10(col_score_2); } if(hmm.nr_alphabets > 2) { null_model_score += log10(col_score_3); } if(hmm.nr_alphabets > 3) { null_model_score += log10(col_score_4); } } else { /* use other multialpha scoring method, not implemented yet */ printf("Error: only JOINT_PROB scoring is implemented\n"); exit(0); } /* update seq index and check if we are done */ l++; if(prf_mode == ALL_SEQS) { if(l >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + l) == END) { break; } } } } if(scoring_method == DOT_PRODUCT_PICASSO) { l = 0; while(1) { /* set index for seq pointer */ if(prf_mode == ALL_SEQS) { p = l; } else { p = *(msa_seq_infop->lead_columns_start + l); } /* get col_score for the different alphabets */ col_score = 0.0; col_score_2 = 0.0; col_score_3 = 0.0; col_score_4 = 0.0; col_score = get_dp_picasso_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1, p, null_model.emissions, 0, normalize, msa_seq_infop->gap_shares, aa_freqs); if(hmm.nr_alphabets > 1) { col_score_2 = get_dp_picasso_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2, p, null_model.emissions_2, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_2); } if(hmm.nr_alphabets > 2) { col_score_3 = get_dp_picasso_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3, p, null_model.emissions_3, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_3); } if(hmm.nr_alphabets > 3) { col_score_4 = get_dp_picasso_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4, p, null_model.emissions_4, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_4); } /* calculate total column score */ if(multi_scoring_method == JOINT_PROB) { null_model_score += log10(col_score) + log10(null_model.trans_prob); if(hmm.nr_alphabets > 1) { null_model_score += log10(col_score_2); } if(hmm.nr_alphabets > 2) { null_model_score += log10(col_score_3); } if(hmm.nr_alphabets > 3) { null_model_score += log10(col_score_4); } } else { /* use other multialpha scoring method, not implemented yet */ printf("Error: only JOINT_PROB scoring is implemented\n"); exit(0); } /* update seq index and check if we are done */ l++; if(prf_mode == ALL_SEQS) { if(l >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + l) == END) { break; } } } } if(scoring_method == PICASSO) { l = 0; while(1) { /* set index for seq pointer */ if(prf_mode == ALL_SEQS) { p = l; } else { p = *(msa_seq_infop->lead_columns_start + l); } /* get col_score for the different alphabets */ col_score = 0.0; col_score_2 = 0.0; col_score_3 = 0.0; col_score_4 = 0.0; col_score = get_picasso_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1, p, null_model.emissions, 0, normalize, msa_seq_infop->gap_shares, aa_freqs); if(hmm.nr_alphabets > 1) { col_score_2 = get_picasso_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2, p, null_model.emissions_2, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_2); } if(hmm.nr_alphabets > 2) { col_score_3 = get_picasso_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3, p, null_model.emissions_3, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_3); } if(hmm.nr_alphabets > 3) { col_score_4 = get_picasso_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4, p, null_model.emissions_4, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_4); } /* calculate total column score */ if(multi_scoring_method == JOINT_PROB) { null_model_score += log10(col_score) + log10(null_model.trans_prob); if(hmm.nr_alphabets > 1) { null_model_score += log10(col_score_2); } if(hmm.nr_alphabets > 2) { null_model_score += log10(col_score_3); } if(hmm.nr_alphabets > 3) { null_model_score += log10(col_score_4); } } else { /* use other multialpha scoring method, not implemented yet */ printf("Error: only JOINT_PROB scoring is implemented\n"); exit(0); } /* update seq index and check if we are done */ l++; if(prf_mode == ALL_SEQS) { if(l >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + l) == END) { break; } } } } if(scoring_method == PICASSO_SYM) { l = 0; while(1) { /* set index for seq pointer */ if(prf_mode == ALL_SEQS) { p = l; } else { p = *(msa_seq_infop->lead_columns_start + l); } /* get col_score for the different alphabets */ col_score = 0.0; col_score_2 = 0.0; col_score_3 = 0.0; col_score_4 = 0.0; col_score = get_picasso_sym_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1, p, null_model.emissions, 0, normalize, msa_seq_infop->gap_shares, aa_freqs); if(hmm.nr_alphabets > 1) { col_score_2 = get_picasso_sym_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2, p, null_model.emissions_2, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_2); } if(hmm.nr_alphabets > 2) { col_score_3 = get_picasso_sym_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3, p, null_model.emissions_3, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_3); } if(hmm.nr_alphabets > 3) { col_score_4 = get_picasso_sym_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4, p, null_model.emissions_4, 0, normalize, msa_seq_infop->gap_shares, aa_freqs_4); } /* calculate total column score */ if(multi_scoring_method == JOINT_PROB) { null_model_score += log10(col_score) + log10(null_model.trans_prob); if(hmm.nr_alphabets > 1) { null_model_score += log10(col_score_2); } if(hmm.nr_alphabets > 2) { null_model_score += log10(col_score_3); } if(hmm.nr_alphabets > 3) { null_model_score += log10(col_score_4); } } else { /* use other multialpha scoring method, not implemented yet */ printf("Error: only JOINT_PROB scoring is implemented\n"); exit(0); } /* update seq index and check if we are done */ l++; if(prf_mode == ALL_SEQS) { if(l >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + l) == END) { break; } } } } if(scoring_method == SJOLANDER) { l = 0; while(1) { /* set index for seq pointer */ if(prf_mode == ALL_SEQS) { p = l; } else { p = *(msa_seq_infop->lead_columns_start + l); } /* get col_score for the different alphabets */ col_score = 0.0; col_score_2 = 0.0; col_score_3 = 0.0; col_score_4 = 0.0; col_score = get_sjolander_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1, p, null_model.emissions, 0, normalize, msa_seq_infop->gap_shares); if(hmm.nr_alphabets > 1) { col_score_2 = get_sjolander_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2, p, null_model.emissions_2, 0, normalize, msa_seq_infop->gap_shares); } if(hmm.nr_alphabets > 2) { col_score_3 = get_sjolander_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3, p, null_model.emissions_3, 0, normalize, msa_seq_infop->gap_shares); } if(hmm.nr_alphabets > 3) { col_score_4 = get_sjolander_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4, p, null_model.emissions_4, 0, normalize, msa_seq_infop->gap_shares); } /* calculate total column score */ if(multi_scoring_method == JOINT_PROB) { null_model_score += log10(col_score) + log10(null_model.trans_prob); if(hmm.nr_alphabets > 1) { null_model_score += log10(col_score_2); } if(hmm.nr_alphabets > 2) { null_model_score += log10(col_score_3); } if(hmm.nr_alphabets > 3) { null_model_score += log10(col_score_4); } } else { /* use other multialpha scoring method, not implemented yet */ printf("Error: only JOINT_PROB scoring is implemented\n"); exit(0); } /* update seq index and check if we are done */ l++; if(prf_mode == ALL_SEQS) { if(l >= seq_len) { break; } } else { if(*(msa_seq_infop->lead_columns_start + l) == END) { break; } } } } if(scoring_method == SJOLANDER_REVERSED) { l = 0; while(1) { /* set index for seq pointer */ if(prf_mode == ALL_SEQS) { p = l; } else { p = *(msa_seq_infop->lead_columns_start + l); } /* get col_score for the different alphabets */ col_score = 0.0; col_score_2 = 0.0; col_score_3 = 0.0; col_score_4 = 0.0; col_score = get_sjolander_reversed_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1, p, null_model.emissions, 0, normalize, msa_seq_infop->gap_shares); if(hmm.nr_alphabets > 1) { col_score_2 = get_sjolander_reversed_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2, p, null_model.emissions_2, 0, normalize, msa_seq_infop->gap_shares); } if(hmm.nr_alphabets > 2) { col_score_3 = get_sjolander_reversed_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3, p, null_model.emissions_3, 0, normalize, msa_seq_infop->gap_shares); } if(hmm.nr_alphabets > 3) { col_score_4 = get_sjolander_reversed_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4, p, null_model.emissions_4, 0, normalize, msa_seq_infop->gap_shares); } /* calculate total column score */ if(multi_scoring_method == JOINT_PROB) { null_model_score += log10(col_score) + log10(null_model.trans_prob); if(hmm.nr_alphabets > 1) { null_model_
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -