📄 hmmsearch.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
    /* use default null model */    emiss_prob = 1.0 / (double)hmm.a_size;    if(hmm.nr_alphabets > 1) {      emiss_prob_2 = 1.0 / (double)hmm.a_size_2;    }    if(hmm.nr_alphabets > 2) {      emiss_prob_3 = 1.0 / (double)hmm.a_size_3;    }    if(hmm.nr_alphabets > 3) {      emiss_prob_4 = 1.0 / (double)hmm.a_size_4;    }    trans_prob = (double)(seq_len)/(double)(seq_len + 1);    null_model.a_size = hmm.a_size;    null_model.emissions = (double*)malloc_or_die(null_model.a_size * sizeof(double));    if(hmm.nr_alphabets > 1) {      null_model.a_size_2 = hmm.a_size_2;      null_model.emissions_2 = (double*)malloc_or_die(null_model.a_size_2 * sizeof(double));    }    if(hmm.nr_alphabets > 2) {      null_model.a_size_3 = hmm.a_size_3;      null_model.emissions_3 = (double*)malloc_or_die(null_model.a_size_3 * sizeof(double));    }    if(hmm.nr_alphabets > 3) {      null_model.a_size_4 = hmm.a_size_4;      null_model.emissions_4 = (double*)malloc_or_die(null_model.a_size_4 * sizeof(double));    }    for(k = 0; k < hmm.a_size; k++) {      null_model.emissions[k] = emiss_prob;      if(hmm.nr_alphabets > 1) {	null_model.emissions_2[k] = emiss_prob_2;      }      if(hmm.nr_alphabets > 2) {	null_model.emissions_3[k] = emiss_prob_3;      }      if(hmm.nr_alphabets > 3) {	null_model.emissions_4[k] = emiss_prob_4;      }    }        null_model.trans_prob = trans_prob;    using_default_null_model = YES;  }    /* NOTE: must include other scoring methods here as well (copy from core_algorithms) */  /* use specified null model */  null_model_score = 0.0;  if(scoring_method == DOT_PRODUCT) {    l = 0;    while(1) {      /* set index for seq pointer */      if(prf_mode == ALL_SEQS) {	p = l;      }      else {	p = *(msa_seq_infop->lead_columns_start + l);      }      /* get col_score for the different alphabets */      col_score = 0.0;      col_score_2 = 0.0;      col_score_3 = 0.0;      col_score_4 = 0.0;      col_score = get_dp_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1,				    p, null_model.emissions,				    0, normalize, msa_seq_infop->gap_shares);      if(hmm.nr_alphabets > 1) {	col_score_2 = get_dp_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2,					p, null_model.emissions_2,					0, normalize, msa_seq_infop->gap_shares);      }      if(hmm.nr_alphabets > 2) {	col_score_3 = get_dp_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3,					p, null_model.emissions_3,					0, normalize, msa_seq_infop->gap_shares);      }      if(hmm.nr_alphabets > 3) {	col_score_4 = get_dp_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4,					p, null_model.emissions_4,					0, normalize, msa_seq_infop->gap_shares);      }            /* calculate total column score */      if(multi_scoring_method == JOINT_PROB) {	null_model_score += log10(col_score) + log10(null_model.trans_prob);	if(hmm.nr_alphabets > 1) {	  null_model_score += log10(col_score_2);	}	if(hmm.nr_alphabets > 2) {	    null_model_score += log10(col_score_3);	}	if(hmm.nr_alphabets > 3) {	  null_model_score += log10(col_score_4);	}      }      else {	/* use other multialpha scoring method, not implemented yet */	printf("Error: only JOINT_PROB scoring is implemented\n");	exit(0);      }      /* update seq index and check if we are done */       l++;      if(prf_mode == ALL_SEQS) {	if(l >= seq_len) {	  break;	}      }      else {	if(*(msa_seq_infop->lead_columns_start + l) == END) {	  break;	}      }    }  }  if(scoring_method == DOT_PRODUCT_PICASSO) {    l = 0;    while(1) {      /* set index for seq pointer */      if(prf_mode == ALL_SEQS) {	p = l;      }      else {	p = *(msa_seq_infop->lead_columns_start + l);      }      /* get col_score for the different alphabets */      col_score = 0.0;      col_score_2 = 0.0;      col_score_3 = 0.0;      col_score_4 = 0.0;      col_score = get_dp_picasso_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1,				    p, null_model.emissions,				    0, normalize, msa_seq_infop->gap_shares, aa_freqs);      if(hmm.nr_alphabets > 1) {	col_score_2 = get_dp_picasso_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2,					p, null_model.emissions_2,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_2);      }      if(hmm.nr_alphabets > 2) {	col_score_3 = get_dp_picasso_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3,					p, null_model.emissions_3,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_3);      }      if(hmm.nr_alphabets > 3) {	col_score_4 = get_dp_picasso_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4,					p, null_model.emissions_4,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_4);      }            /* calculate total column score */      if(multi_scoring_method == JOINT_PROB) {	null_model_score += log10(col_score) + log10(null_model.trans_prob);	if(hmm.nr_alphabets > 1) {	  null_model_score += log10(col_score_2);	}	if(hmm.nr_alphabets > 2) {	    null_model_score += log10(col_score_3);	}	if(hmm.nr_alphabets > 3) {	  null_model_score += log10(col_score_4);	}      }      else {	/* use other multialpha scoring method, not implemented yet */	printf("Error: only JOINT_PROB scoring is implemented\n");	exit(0);      }      /* update seq index and check if we are done */       l++;      if(prf_mode == ALL_SEQS) {	if(l >= seq_len) {	  break;	}      }      else {	if(*(msa_seq_infop->lead_columns_start + l) == END) {	  break;	}      }    }  }  if(scoring_method == PICASSO) {    l = 0;    while(1) {      /* set index for seq pointer */      if(prf_mode == ALL_SEQS) {	p = l;      }      else {	p = *(msa_seq_infop->lead_columns_start + l);      }      /* get col_score for the different alphabets */      col_score = 0.0;      col_score_2 = 0.0;      col_score_3 = 0.0;      col_score_4 = 0.0;      col_score = get_picasso_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1,				    p, null_model.emissions,				    0, normalize, msa_seq_infop->gap_shares, aa_freqs);      if(hmm.nr_alphabets > 1) {	col_score_2 = get_picasso_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2,					p, null_model.emissions_2,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_2);      }      if(hmm.nr_alphabets > 2) {	col_score_3 = get_picasso_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3,					p, null_model.emissions_3,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_3);      }      if(hmm.nr_alphabets > 3) {	col_score_4 = get_picasso_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4,					p, null_model.emissions_4,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_4);      }            /* calculate total column score */      if(multi_scoring_method == JOINT_PROB) {	null_model_score += log10(col_score) + log10(null_model.trans_prob);	if(hmm.nr_alphabets > 1) {	  null_model_score += log10(col_score_2);	}	if(hmm.nr_alphabets > 2) {	    null_model_score += log10(col_score_3);	}	if(hmm.nr_alphabets > 3) {	  null_model_score += log10(col_score_4);	}      }      else {	/* use other multialpha scoring method, not implemented yet */	printf("Error: only JOINT_PROB scoring is implemented\n");	exit(0);      }      /* update seq index and check if we are done */       l++;      if(prf_mode == ALL_SEQS) {	if(l >= seq_len) {	  break;	}      }      else {	if(*(msa_seq_infop->lead_columns_start + l) == END) {	  break;	}      }    }  }  if(scoring_method == PICASSO_SYM) {    l = 0;    while(1) {      /* set index for seq pointer */      if(prf_mode == ALL_SEQS) {	p = l;      }      else {	p = *(msa_seq_infop->lead_columns_start + l);      }      /* get col_score for the different alphabets */      col_score = 0.0;      col_score_2 = 0.0;      col_score_3 = 0.0;      col_score_4 = 0.0;      col_score = get_picasso_sym_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1,				    p, null_model.emissions,				    0, normalize, msa_seq_infop->gap_shares, aa_freqs);      if(hmm.nr_alphabets > 1) {	col_score_2 = get_picasso_sym_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2,					p, null_model.emissions_2,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_2);      }      if(hmm.nr_alphabets > 2) {	col_score_3 = get_picasso_sym_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3,					p, null_model.emissions_3,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_3);      }      if(hmm.nr_alphabets > 3) {	col_score_4 = get_picasso_sym_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4,					p, null_model.emissions_4,					0, normalize, msa_seq_infop->gap_shares, aa_freqs_4);      }            /* calculate total column score */      if(multi_scoring_method == JOINT_PROB) {	null_model_score += log10(col_score) + log10(null_model.trans_prob);	if(hmm.nr_alphabets > 1) {	  null_model_score += log10(col_score_2);	}	if(hmm.nr_alphabets > 2) {	    null_model_score += log10(col_score_3);	}	if(hmm.nr_alphabets > 3) {	  null_model_score += log10(col_score_4);	}      }      else {	/* use other multialpha scoring method, not implemented yet */	printf("Error: only JOINT_PROB scoring is implemented\n");	exit(0);      }      /* update seq index and check if we are done */       l++;      if(prf_mode == ALL_SEQS) {	if(l >= seq_len) {	  break;	}      }      else {	if(*(msa_seq_infop->lead_columns_start + l) == END) {	  break;	}      }    }  }  if(scoring_method == SJOLANDER) {    l = 0;    while(1) {       /* set index for seq pointer */      if(prf_mode == ALL_SEQS) {	p = l;      }      else {	p = *(msa_seq_infop->lead_columns_start + l);      }      /* get col_score for the different alphabets */      col_score = 0.0;      col_score_2 = 0.0;      col_score_3 = 0.0;      col_score_4 = 0.0;      col_score = get_sjolander_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1,					   p, null_model.emissions,					   0, normalize, msa_seq_infop->gap_shares);      if(hmm.nr_alphabets > 1) {	col_score_2 = get_sjolander_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2,						 p, null_model.emissions_2,					       0, normalize, msa_seq_infop->gap_shares);      }      if(hmm.nr_alphabets > 2) {	col_score_3 = get_sjolander_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3,					       p, null_model.emissions_3,						 0, normalize, msa_seq_infop->gap_shares);      }      if(hmm.nr_alphabets > 3) {	  col_score_4 = get_sjolander_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4,						 p, null_model.emissions_4,						 0, normalize, msa_seq_infop->gap_shares);	}            /* calculate total column score */      if(multi_scoring_method == JOINT_PROB) {	null_model_score += log10(col_score) + log10(null_model.trans_prob);	if(hmm.nr_alphabets > 1) {	  null_model_score += log10(col_score_2);	}	if(hmm.nr_alphabets > 2) {	  null_model_score += log10(col_score_3);	}	  if(hmm.nr_alphabets > 3) {	    null_model_score += log10(col_score_4);	  }      }      else {	/* use other multialpha scoring method, not implemented yet */	printf("Error: only JOINT_PROB scoring is implemented\n");	exit(0);      }      /* update seq index and check if we are done */       l++;      if(prf_mode == ALL_SEQS) {	if(l >= seq_len) {	  break;	}      }      else {	if(*(msa_seq_infop->lead_columns_start + l) == END) {	  break;	}      }    }  }  if(scoring_method == SJOLANDER_REVERSED) {    l = 0;    while(1) {       /* set index for seq pointer */      if(prf_mode == ALL_SEQS) {	p = l;      }      else {	p = *(msa_seq_infop->lead_columns_start + l);      }      /* get col_score for the different alphabets */      col_score = 0.0;      col_score_2 = 0.0;      col_score_3 = 0.0;      col_score_4 = 0.0;      col_score = get_sjolander_reversed_statescore(null_model.a_size, use_gap_shares, use_prior, msa_seq_infop->msa_seq_1,						    p, null_model.emissions,						    0, normalize, msa_seq_infop->gap_shares);      if(hmm.nr_alphabets > 1) {	col_score_2 = get_sjolander_reversed_statescore(null_model.a_size_2, use_gap_shares, use_prior, msa_seq_infop->msa_seq_2,							p, null_model.emissions_2,							0, normalize, msa_seq_infop->gap_shares);      }      if(hmm.nr_alphabets > 2) {	col_score_3 = get_sjolander_reversed_statescore(null_model.a_size_3, use_gap_shares, use_prior, msa_seq_infop->msa_seq_3,							p, null_model.emissions_3,							0, normalize, msa_seq_infop->gap_shares);      }      if(hmm.nr_alphabets > 3) {	col_score_4 = get_sjolander_reversed_statescore(null_model.a_size_4, use_gap_shares, use_prior, msa_seq_infop->msa_seq_4,							p, null_model.emissions_4,							0, normalize, msa_seq_infop->gap_shares);      }            /* calculate total column score */      if(multi_scoring_method == JOINT_PROB) {	null_model_score += log10(col_score) + log10(null_model.trans_prob);	if(hmm.nr_alphabets > 1) {	  null_model_
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -