⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmtrain.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <math.h>#include <limits.h>#include "structs.h"#include "funcs.h"#include "cmdline_hmmtrain.h"#define MAX_LINE 4000#define MAX_SEQS 4000#define FIRST_SEQ 1extern int verbose;/* memory for transition and emission matrices, from_vertex array and    to_vertex_array will be allocated in readhmm, but must be freed here */static struct hmm_multi_s hmm;static struct msa_sequences_multi_s *msa_seq_infop;static struct sequences_multi_s seq_info;static struct replacement_letter_multi_s replacement_letters;double *subst_mtxp;double *subst_mtxp_2;double *subst_mtxp_3;double *subst_mtxp_4;double *aa_freqs;double *aa_freqs_2;double *aa_freqs_3;double *aa_freqs_4;int main(int argc, char* argv[]){  int i;  FILE *hmmfile, *outfile, *seqfile, *replfile, *seqnamefile, *substmtxfile, *freqfile;  double d;  int seq_format;  int use_gap_shares;  int use_lead_columns;  int lead_seq;  int use_labels;  int annealing;  int use_transition_pseudo_counts, use_emission_pseudo_counts;  int nr_seqs, seq_counter, nr_read_seqs;  char seq_name[200];  int normalize;  int scoring_method;  int read_subst_mtx;  int use_nr_occurences;  int multi_scoring_method;  int training_method;  int hmmfiletype;  int use_prior;   struct gengetopt_args_info args_info;  seq_format = STANDARD;  hmmfile = NULL;  outfile = NULL;  seqfile = NULL;  replfile = NULL;  seqnamefile = NULL;  substmtxfile = NULL;  freqfile = NULL;  use_gap_shares = YES;  use_lead_columns = YES;  lead_seq = FIRST_SEQ;  use_labels = NO;  annealing = NO;  use_transition_pseudo_counts = NO;  use_emission_pseudo_counts = NO;  normalize = NO;  scoring_method = SJOLANDER;  read_subst_mtx = NO;  use_nr_occurences = NO;  multi_scoring_method = JOINT_PROB;  subst_mtxp = NULL;  subst_mtxp_2 = NULL;  subst_mtxp_3 = NULL;  subst_mtxp_4 = NULL;  aa_freqs = NULL;  aa_freqs_2 = NULL;  aa_freqs_3 = NULL;  aa_freqs_4 = NULL;  training_method = BW_STD;  use_prior = YES;    /* parse command line */  if(cmdline_parser(argc, argv, &args_info) != 0) {    exit(1);  }  /* compulsory options */  if(args_info.hmminfile_given) {    if((hmmfile = fopen(args_info.hmminfile_arg, "r")) == NULL) {      perror(args_info.hmminfile_arg);      exit(0);    }    else {      printf("Opened file %s for reading model file\n",args_info.hmminfile_arg);    }  }  if(args_info.seqnamefile_given) {    if((seqnamefile = fopen(args_info.seqnamefile_arg, "r")) == NULL) {      perror(args_info.seqnamefile_arg);      exit(0);    }    else {      printf("Opened file %s for reading sequence names\n",args_info.seqnamefile_arg);    }  }  if(args_info.outfile_given) {    if((outfile = fopen(args_info.outfile_arg, "w")) == NULL) {      perror(args_info.outfile_arg);      exit(0);    }    else {      printf("Opened file %s for writing\n",args_info.outfile_arg);    }  }  if(args_info.seqformat_given) {    if(strcmp(args_info.seqformat_arg, "fa") == 0) {      seq_format = FASTA;     }    else if(strcmp(args_info.seqformat_arg, "s") == 0) {      seq_format = STANDARD;    }    else if(strcmp(args_info.seqformat_arg, "msa") == 0) {      seq_format = MSA_STANDARD;    }    else if(strcmp(args_info.seqformat_arg, "prf") == 0) {      seq_format = PROFILE;    }    else {      printf("Incorrect sequence format: %s\n", args_info.seqformat_arg);      exit(0);    }  }  /* non compulsory options */  if(args_info.smxfile_given) {    if((substmtxfile = fopen(args_info.smxfile_arg, "r")) == NULL) {      perror(args_info.smxfile_arg);      exit(0);    }    else {      read_subst_mtx = YES;      printf("Opened file %s for reading substitution matrix\n",args_info.smxfile_arg);    }  }  if(args_info.freqfile_given) {    if((freqfile = fopen(args_info.freqfile_arg, "r")) == NULL) {      perror(args_info.freqfile_arg);      exit(0);    }    else {            printf("Opened file %s for reading background frequencies\n",args_info.freqfile_arg);    }  }  if(args_info.replfile_given) {    if((replfile = fopen(args_info.replfile_arg, "r")) == NULL) {      perror(args_info.replfile_arg);      exit(0);    }    else {      printf("Opened file %s for reading replacement letters\n",args_info.replfile_arg);    }  }  if(args_info.alg_given) {    if(strcmp(args_info.alg_arg, "bw") == 0) {      training_method = BW_STD;    }    else if(strcmp(args_info.alg_arg, "cml") == 0) {      training_method = CML_STD;    }    else {      printf("Incorrect training method option: %s\n", args_info.alg_arg);      exit(0);    }  }    /* msa scoring options */  if(args_info.msascoring_given) {    if(strcmp(args_info.msascoring_arg, "DP") == 0) {      scoring_method = DOT_PRODUCT;    }    else if(strcmp(args_info.msascoring_arg, "DPPI") == 0) {      scoring_method = DOT_PRODUCT_PICASSO;    }    else if(strcmp(args_info.msascoring_arg, "PI") == 0) {      scoring_method = PICASSO;    }    else if(strcmp(args_info.msascoring_arg, "PIS") == 0) {      scoring_method = PICASSO_SYM;    }    else if(strcmp(args_info.msascoring_arg, "GM") == 0) {      scoring_method = SJOLANDER;    }    else if(strcmp(args_info.msascoring_arg, "GMR") == 0) {      scoring_method = SJOLANDER_REVERSED;    }    //else if(strcmp(args_info.msascoring_arg, "SMP") == 0) {    //  scoring_method = SUBST_MTX_PRODUCT;    //}    //else if(strcmp(args_info.msascoring_arg, "SMDP") == 0) {    //  scoring_method = SUBST_MTX_DOT_PRODUCT;    //}    //else if(strcmp(args_info.msascoring_arg, "SMDPP") == 0) {    //  scoring_method = SUBST_MTX_DOT_PRODUCT_PRIOR;    //}    else {      printf("Incorrect scoring method option: %s\n", args_info.msascoring_arg);      exit(0);    }  }  if(args_info.usecolumns_given) {    if(strcmp(args_info.usecolumns_arg, "all") == 0) {      use_lead_columns = NO;    }    else {      lead_seq = atoi(args_info.usecolumns_arg);      use_lead_columns = YES;      if(lead_seq <= 0) {	printf("Incorrect use-column option: %s\n", args_info.usecolumns_arg);	exit(0);      }    }  }  /* flags */  if(args_info.nolabels_given) {    /* checked after seqread */  }  if(args_info.noprior_given) {    /* checked after hmm read */  }  if(args_info.tpcounts_given) {    use_transition_pseudo_counts = YES;  }  if(args_info.epcounts_given) {    use_emission_pseudo_counts = YES;  }  if(args_info.verbose_given) {    verbose = YES;  }      /* read subst mtx */  if(substmtxfile != NULL) {      read_subst_matrix_multi(&subst_mtxp, &subst_mtxp_2, &subst_mtxp_3, &subst_mtxp_4, substmtxfile);  }    /* get frequency file */  if(freqfile != NULL) {    read_frequencies_multi(freqfile, &aa_freqs, &aa_freqs_2, &aa_freqs_3, &aa_freqs_4);  }    if((scoring_method == SUBST_MTX_PRODUCT || scoring_method == SUBST_MTX_DOT_PRODUCT ||      scoring_method == SUBST_MTX_DOT_PRODUCT_PRIOR)     && read_subst_mtx == NO) {    printf("Error: No substitution matrix supplied\n");    exit(0);  }     /* get hmm from file */  if(hmmfile != NULL) {    hmmfiletype = readhmm_check(hmmfile);    if(hmmfiletype == SINGLE_HMM) {      readhmm(hmmfile, &hmm);    }    else if(hmmfiletype == MULTI_HMM) {      readhmm_multialpha(hmmfile, &hmm);    }    hmm.subst_mtx = subst_mtxp;    hmm.subst_mtx_2 = subst_mtxp_2;    hmm.subst_mtx_3 = subst_mtxp_3;    hmm.subst_mtx_4 = subst_mtxp_4;  }  else {    /* cannot happen */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -