⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 opt_prfhmm.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <float.h>#include "structs.h"#include "funcs.h"#define NEW 1#define ACID 2#define GAP 3#define INSERT_ACID 4#define INSERT_GAP 5#define FAST_MATCH_LIMIT 0.4 /* gap share must be lower than this number */#define SI_PROB 0.99#define EI_PROB 0.99//#define DEBUG_PRIextern int verbose;void set_trans_probs(FILE *seq_infile, int local);void label_msa_fast();void label_msa_query(FILE *seq_infile);void henikoff_weighting(struct msa_sequences_multi_s *msa_seq_infop, FILE *seq_infile, struct hmm_multi_s *hmmp);void printhelp();struct hmm_multi_s hmm;struct msa_sequences_multi_s msa_seq_info;struct replacement_letter_multi_s replacement_letters;struct emission_dirichlet_s em_di;struct emission_dirichlet_s em_di_2;struct emission_dirichlet_s em_di_3;struct emission_dirichlet_s em_di_4;double *subst_mtxp;double *subst_mtxp_2;double *subst_mtxp_3;double *subst_mtxp_4;int main(int argc, char* argv[]){  FILE *hmm_infile = NULL;  FILE *hmm_outfile = NULL;  FILE *seq_infile = NULL;  FILE *flat_seq_infile = NULL;  FILE *replfile = NULL;  FILE *priorfile = NULL;  FILE *priorfile_2 = NULL;  FILE *priorfile_3 = NULL;  FILE *priorfile_4 = NULL;  FILE *substmtxfile = NULL;  FILE *outfile_tmp = NULL;  FILE *freqfile = NULL;  FILE *freqfile_2 = NULL;  FILE *freqfile_3 = NULL;  FILE *freqfile_4 = NULL;    int i = 0;  int use_gap_shares;  char seq_name[200];  int normalize;  int scoring_method;  int read_subst_mtx;  int multi_scoring_method;  int nr_alphabets;  int pre_labeled;  int model_maker;  int local;  int weighting_scheme;  double *aa_freqs;  double *aa_freqs_2;  double *aa_freqs_3;  double *aa_freqs_4;  multi_scoring_method = JOINT_PROB;  read_subst_mtx = NO;  normalize = NO;  scoring_method = SJOLANDER;  nr_alphabets = 0;  pre_labeled = NO;  model_maker = FAST;  local = NO;  weighting_scheme = NONE;  aa_freqs = NULL;  aa_freqs_2 = NULL;  aa_freqs_3 = NULL;  aa_freqs_4 = NULL;  if(argc > 1) {    for(i = 1; i < argc; i++) {      if(strcmp(argv[i], "-la") == 0) {	i++;	if((seq_infile = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  pre_labeled = YES;	  printf("Opened file %s for reading msa sequence\n",argv[i]);	}      }      if(strcmp(argv[i], "-std") == 0) {	i++;	if((seq_infile = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading msa sequence\n",argv[i]);	}      }      if(strcmp(argv[i], "-seq") == 0) {	i++;	if((flat_seq_infile = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading sequence\n",argv[i]);	}      }      else if(strcmp(argv[i], "-hmm") == 0) {	i++;	if((hmm_infile = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading hmm\n",argv[i]);	}      }      else if(strcmp(argv[i], "-o") == 0) {	i++;	if((hmm_outfile = fopen(argv[i], "w")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for writing\n",argv[i]);	}      }      else if(strcmp(argv[i], "-scr") == 0) {	i++;	if(strcmp(argv[i], "DP") == 0) {	  scoring_method = DOT_PRODUCT;	}	else if(strcmp(argv[i], "GM") == 0) {	  scoring_method = SJOLANDER;	}	else if(strcmp(argv[i], "SMP") == 0) {	  scoring_method = SUBST_MTX_PRODUCT;	}	else if(strcmp(argv[i], "SMDP") == 0) {	  scoring_method = SUBST_MTX_DOT_PRODUCT;	}	else if(strcmp(argv[i], "SMDPP") == 0) {	  scoring_method = SUBST_MTX_DOT_PRODUCT_PRIOR;	}	else {	  printf("Incorrect option: %s\n", argv[i]);	  printhelp();	  exit(0);	}      }      else if(strcmp(argv[i], "-l") == 0) {	i++;	if(strcmp(argv[i], "F") == 0) {	  local = NO;	}	else if(strcmp(argv[i], "T") == 0) {	  local = YES;	}	else {	  printf("Incorrect option: %s\n", argv[i]);	  printhelp();	  exit(0);	}      }      else if(strcmp(argv[i], "-m") == 0) {	i++;	if(strcmp(argv[i], "F") == 0) {	  model_maker = FAST;	}	else if(strcmp(argv[i], "Q") == 0) {	  model_maker = QUERY;	}	else {	  printf("Incorrect option: %s\n", argv[i]);	  printhelp();	  exit(0);	}      }      else if(strcmp(argv[i], "-w") == 0) {	i++;	if(strcmp(argv[i], "H") == 0) {	  weighting_scheme = HENIKOFF;	}	else {	  printf("Incorrect option: %s\n", argv[i]);	  printhelp();	  exit(0);	}      }      else if(strcmp(argv[i], "-smx") == 0) {	i++;	if((substmtxfile = fopen(argv[i], "r")) == NULL) {	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading substitution matrix\n",argv[i]);	  read_subst_mtx = YES;	}      }      else if(strcmp(argv[i], "-pri") == 0) {	i++;	if((priorfile = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading priorfile\n",argv[i]);	}      }      else if(strcmp(argv[i], "-pri2") == 0) {	i++;	if((priorfile_2 = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading priorfile 2\n",argv[i]);	}      }      else if(strcmp(argv[i], "-pri3") == 0) {	i++;	if((priorfile_3 = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading priorfile 3\n",argv[i]);	}      }      else if(strcmp(argv[i], "-pri4") == 0) {	i++;	if((priorfile_4 = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading priorfile 4\n",argv[i]);	}      }      else if(strcmp(argv[i], "-freq1") == 0) {	i++;	if((freqfile = fopen(argv[i], "r")) == NULL) {	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading prior frequencies\n",argv[i]);	}      }      else if(strcmp(argv[i], "-freq2") == 0) {	i++;	if((freqfile_2 = fopen(argv[i], "r")) == NULL) {	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading prior frequencies\n",argv[i]);	}      }      else if(strcmp(argv[i], "-freq3") == 0) {	i++;	if((freqfile_3 = fopen(argv[i], "r")) == NULL) {	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading prior frequencies\n",argv[i]);	}      }      else if(strcmp(argv[i], "-freq4") == 0) {	i++;	if((freqfile_4 = fopen(argv[i], "r")) == NULL) {	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading prior frequencies\n",argv[i]);	}      }      else if(strcmp(argv[i], "-r") == 0) {	i++;	if((replfile = fopen(argv[i], "r")) == NULL) {	  	  perror(argv[i]);	  printhelp();	  exit(0);	}	else {	  printf("Opened file %s for reading replacement letters\n",argv[i]);	}      }      else if(strcmp(argv[i], "-verbose") == 0) {	verbose = YES;      }      else if(strcmp(argv[i], "-h") == 0) {	printhelp();	exit(0);      }    }    /* read subst mtx */    if(read_subst_mtx == YES) {      if(read_subst_matrix_multi(&subst_mtxp, &subst_mtxp_2, &subst_mtxp_3, &subst_mtxp_4, substmtxfile) == NO) {	printhelp();	exit(0);      }    }    /* get frequency file */    if(freqfile != NULL) {      read_frequencies(freqfile, &aa_freqs);    }    /* get frequency file */    if(freqfile_2 != NULL) {      read_frequencies(freqfile_2, &aa_freqs_2);    }        /* get frequency file */    if(freqfile_3 != NULL) {      read_frequencies(freqfile_3, &aa_freqs_3);    }        /* get frequency file */    if(freqfile_4 != NULL) {      read_frequencies(freqfile_4, &aa_freqs_4);    }            if((scoring_method == SUBST_MTX_PRODUCT || scoring_method == SUBST_MTX_DOT_PRODUCT ||	scoring_method == SUBST_MTX_DOT_PRODUCT_PRIOR)       && read_subst_mtx == NO) {      printf("Error: No substitution matrix supplied\n");      printhelp();      exit(0);    }    /* get hmm from file */    if(hmm_infile != NULL) {      nr_alphabets = get_nr_alphabets(hmm_infile);      rewind(hmm_infile);      if(nr_alphabets == 11) {	if((outfile_tmp = fopen("tmp", "w")) == NULL) {	  perror("tmp");	}	transform_singlehmmfile_to_multi(hmm_infile, outfile_tmp);	fclose(hmm_infile);	fclose(outfile_tmp);		if((hmm_infile = fopen("tmp", "r")) == NULL) {	  perror("tmp");	}      }      rewind(hmm_infile);      readhmm_multialpha(hmm_infile, &hmm);      hmm.subst_mtx = subst_mtxp;      hmm.subst_mtx_2 = subst_mtxp_2;      hmm.subst_mtx_3 = subst_mtxp_3;      hmm.subst_mtx_4 = subst_mtxp_4;          }    else {      printhelp();      exit(0);    }    /* get replacement letters */    if(replfile != NULL) {      get_replacement_letters_multi(replfile, &replacement_letters);    }    else {      replacement_letters.nr_alphabets = -1;    }    /* get training sequence */    /* Note: memory for the sequences will be allocated in get_sequences method, but     * must be freed here */    get_sequences_msa_std_multi(seq_infile, NULL, NULL, NULL, NULL, &msa_seq_info, &hmm, -1, &replacement_letters);    /* get labels */    if(pre_labeled == YES) {      get_msa_labels_all_columns_multi(seq_infile, &msa_seq_info, &hmm);    }    else if(model_maker == FAST) {      label_msa_fast();    }    else if(model_maker == QUERY) {      label_msa_query(seq_infile);    }    /* read priorfiles */    if(priorfile != NULL) {      read_prior_file_multi(&em_di, &hmm, priorfile, 1);    }    if(priorfile_2 != NULL) {      read_prior_file_multi(&em_di_2, &hmm, priorfile_2, 2);    }    if(priorfile_3 != NULL) {      read_prior_file_multi(&em_di_3, &hmm, priorfile_3, 3);    }    if(priorfile_4 != NULL) {      read_prior_file_multi(&em_di_4, &hmm, priorfile_4, 4);    }        while(1) {       /* adjust sequence weights */      //dump_msa_seqs_multi(&msa_seq_info, &hmm);      if(weighting_scheme == HENIKOFF) {	henikoff_weighting(&msa_seq_info, flat_seq_infile, &hmm);      }      //dump_msa_seqs_multi(&msa_seq_info, &hmm);      /* do initial match state training */      msa_baum_welch_dirichlet_multi(&hmm, &msa_seq_info, 1, NO, use_gap_shares, NO, YES,				     YES, NO, normalize, scoring_method,				     YES, multi_scoring_method, aa_freqs, aa_freqs_2, aa_freqs_3, aa_freqs_4, YES);      /* set insert emission and transition parameters */      rewind(seq_infile);      set_trans_probs(seq_infile, local);      break;    }    if(hmm_outfile != NULL) {       savehmm_multialpha(hmm_outfile, &hmm);    }    else {      savehmm_multialpha(fopen("a.hmg", "w"), &hmm);    }  }    /* cleanup */  if(priorfile != NULL) {    free(em_di.q_values);    free(em_di.alpha_sums);    free(em_di.logbeta_values);    free(em_di.prior_values);    fclose(priorfile);  }    if(freqfile != NULL) {    free(aa_freqs);    fclose(freqfile);  }  if(freqfile_2 != NULL) {    free(aa_freqs_2);    fclose(freqfile_2);  }  if(freqfile_3 != NULL) {    free(aa_freqs_3);    fclose(freqfile_3);  }  if(freqfile_4 != NULL) {    free(aa_freqs_4);    fclose(freqfile_4);  }  }void set_trans_probs(FILE *seq_infile, int local){  char cur;  int nr_mtx_columns, nr_insert_columns;  int *transitions;  double *insert_emissions;  //double *insert_acids;  int i,j;  int a_index, is_gap, new;  int cur_msa_column, cur_mtx_column, cur_mtx_col_nr, cur_insert_column;  int last_sign, cur_sign;  int d_tot, m_tot, i_tot, s_tot, dd, dm, mm, md, mi, ii, im, sd, sm;  int use_insert_matrix;  double transprob_ii;  char *row;  int INSERT_SIZE = 100;  int tot_nr_chars;    int alphabet_done;  int cur_alphabet;  double local_me_prob;  double local_im_prob;  double tot_local_im_prob;    local_me_prob = 0.0;  local_im_prob = 0.0;  rewind(seq_infile);    use_insert_matrix = YES;  /* count nr of M and I labels in alignment */    nr_mtx_columns = 0;  nr_insert_columns = 0;  tot_nr_chars = 22;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -