⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hmmsearch.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 5 页
字号:
#include <stdio.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <limits.h>#include "structs.h"#include "funcs.h"#include "cmdline_hmmsearch.h"#define NORM_LOG_LIKE 0#define LOGODDS 1#define HMM 20#define SEQ 21#define LONGEST_SEQ -1 /* Note: this constant is also defined in read_seqs.c */#define FIRST_SEQ 1#define LEAD_SEQ 10#define ALL_SEQS 11#define MAX_LINE 500extern int verbose;static struct hmm_multi_s hmm;static struct hmm_multi_s retrain_hmm;static struct msa_sequences_multi_s *msa_seq_infop;static struct sequences_multi_s seq_info;static struct replacement_letter_multi_s replacement_letters;static struct null_model_multi_s null_model;double *subst_mtxp;double *subst_mtxp_2;double *subst_mtxp_3;double *subst_mtxp_4;double *aa_freqs;double *aa_freqs_2;double *aa_freqs_3;double *aa_freqs_4;void get_null_model_multi(FILE *nullfile);double get_nullmodel_score_multi(struct letter_s *seq, struct letter_s *seq_2, struct letter_s *seq_3,				 struct letter_s *seq_4, int seq_len, int multi_scoring_method);double get_nullmodel_score_msa_multi(int seq_len, int prf_mode, int use_prior,				     int use_gap_shares, int normalize,				     int scoring_method, int multi_scoring_method, double *aa_freqs,				     double *aa_freqs_2, double *aa_freqs_3, double *aa_freqs_4);void get_scores(int run_max_d, int run_forward, int run_backward, int run_viterbi, int run_n_best,		int seq_format, int prf_mode, int use_gap_shares, int use_prior, int use_labels, int normalize,		int scoring_method, int multi_scoring_method, int print_labels, int print_post_probs,		int print_log_like, int print_log_odds, int print_reversi, int print_path, FILE *outfile);void get_post_prob_matrix(double **ret_post_prob_matrixp, double forw_score, struct forward_s *forw_mtx,			  struct backward_s *backw_mtx, int seq_len);int main(int argc, char* argv[]){  /* command line options */  FILE *outfile; /* file to prints results to */  FILE *hmmnamefile; /* file to read hmm names from */  FILE *hmmfile; /* file to read hmms from */  FILE *seqfile; /* file to read sequences from */  FILE *nullfile; /* file to read null model from */  FILE *seqnamefile; /* file for reading sequence names */  FILE *replfile; /* file for reading special replacement letters */  FILE *priorfile; /* file to read priordistribution from */  FILE *substmtxfile; /* file to read substitution matrix from */  FILE *freqfile;  int seq_format; /* input sequence format */  int align_alg, label_alg; /* algorithm for calculating match score */  int query; /* are hmms or seqs the queryside */  char seq_name[200];   char hmm_name[200];  char outfilepath_name[200];  char cur_savefile_name[200];  char *pure_seq_name_p, pure_seq_name_a[200]; /* sequences name without path name */  char *pure_hmm_name_p, pure_hmm_name_a[200]; /* sequences name without path name */  char *last_slash, *last_dot;  int prf_mode; /* method for doing the msa-hmm search */  int lead_seq; /* index of the sequence on which the search is performed */  int use_gap_shares; /* account for/don't account for gaps use when calculating the share of a symbol */  int use_prior; /* are we working with priors or not */  int use_labels;  int normalize;  int scoring_method;  int read_subst_mtx;  int multi_scoring_method;  int print_log_like;  int print_log_odds;  int print_reversi;  int print_labels;  int print_post_probs;  int print_path;  int run_forward;  int run_backward;  int run_viterbi;  int run_n_best;  int run_max_d;  int hmmfiletype;  int use_lead_columns;    struct gengetopt_args_info args_info;  /* temporary variables */  int i,j; /*standard loop indices */   /*init some variables */  outfile = NULL;  hmmnamefile = NULL;  hmmfile = NULL;  seqfile = NULL;  nullfile = NULL;  seqnamefile = NULL;  replfile = NULL;  priorfile = NULL;  substmtxfile = NULL;  freqfile = NULL;  seq_format = STANDARD;  align_alg = FORW_ALG;  label_alg = ONE_BEST_ALG;  query = SEQ;  outfilepath_name[0] = '\0';  lead_seq = -1;  prf_mode = ALL_SEQS;  use_gap_shares = YES;  use_prior = NO;  use_labels = NO;  normalize = NO;  scoring_method = DOT_PRODUCT;  read_subst_mtx = NO;  multi_scoring_method = JOINT_PROB;  subst_mtxp = NULL;  subst_mtxp_2 = NULL;  subst_mtxp_3 = NULL;  subst_mtxp_4 = NULL;  aa_freqs = NULL;  aa_freqs_2 = NULL;  aa_freqs_3 = NULL;  aa_freqs_4 = NULL;  print_log_like = NO;  print_log_odds = NO;  print_reversi = NO;  print_labels = NO;  print_post_probs = NO;  print_path = NO;  run_forward = NO;  run_backward = NO;  run_viterbi = NO;  run_n_best = NO;  run_max_d = NO;  /* parse command line */  if(cmdline_parser(argc, argv, &args_info) != 0) {    exit(1);  }  /* compulsory options */  if(args_info.hmmnamefile_given) {    if((hmmnamefile = fopen(args_info.hmmnamefile_arg, "r")) == NULL) {      perror(args_info.hmmnamefile_arg);      exit(0);    }    else {      printf("Opened file %s for reading hmm names\n",args_info.hmmnamefile_arg);    }  }  if(args_info.seqnamefile_given) {    if((seqnamefile = fopen(args_info.seqnamefile_arg, "r")) == NULL) {      perror(args_info.seqnamefile_arg);      exit(0);    }    else {      printf("Opened file %s for reading sequence names\n",args_info.seqnamefile_arg);    }  }  if(args_info.outpath_given) {    strcpy(outfilepath_name, args_info.outpath_arg);  }    if(args_info.seqformat_given) {    if(strcmp(args_info.seqformat_arg, "fa") == 0) {      seq_format = FASTA;     }    else if(strcmp(args_info.seqformat_arg, "s") == 0) {      seq_format = STANDARD;    }    else if(strcmp(args_info.seqformat_arg, "msa") == 0) {      seq_format = MSA_STANDARD;    }    else if(strcmp(args_info.seqformat_arg, "prf") == 0) {      seq_format = PROFILE;    }    else {      printf("Incorrect sequence format: %s\n", args_info.seqformat_arg);      exit(0);    }  }  /* non compulsory options */  if(args_info.smxfile_given) {    if((substmtxfile = fopen(args_info.smxfile_arg, "r")) == NULL) {      perror(args_info.smxfile_arg);      exit(0);    }    else {      read_subst_mtx = YES;      printf("Opened file %s for reading substitution matrix\n",args_info.smxfile_arg);    }  }  if(args_info.freqfile_given) {    if((freqfile = fopen(args_info.freqfile_arg, "r")) == NULL) {      perror(args_info.freqfile_arg);      exit(0);    }    else {            printf("Opened file %s for reading background frequencies\n",args_info.freqfile_arg);    }  }  if(args_info.replfile_given) {    if((replfile = fopen(args_info.replfile_arg, "r")) == NULL) {      perror(args_info.replfile_arg);      exit(0);    }    else {      printf("Opened file %s for reading replacement letters\n",args_info.replfile_arg);    }  }  if(args_info.priorfile_given) {    if((priorfile = fopen(args_info.priorfile_arg, "r")) == NULL) {      perror(args_info.priorfile_arg);      exit(0);    }    else {      printf("Opened file %s for reading priors\n",args_info.priorfile_arg);    }  }  if(args_info.nullfile_given) {    if((nullfile = fopen(args_info.nullfile_arg, "r")) == NULL) {      perror(args_info.nullfile_arg);      exit(0);    }    else {      printf("Opened file %s for reading null model\n",args_info.nullfile_arg);    }  }    if(args_info.anchor_given) {    if(strcmp(args_info.anchor_arg, "hmm") == 0) {      query = SEQ;    }    else if(strcmp(args_info.anchor_arg, "seq") == 0) {      query = HMM;    }    else {      printf("Error: Unrecognized anchor option\n");      exit(0);    }  }   if(args_info.labeloutput_given) {    print_labels = YES;    print_post_probs = YES;  }    if(args_info.alignmentoutput_given) {    print_reversi = YES;    print_log_odds = YES;    print_log_like = YES;  }    if(args_info.viterbi_given) {    label_alg = VITERBI_ALG;    align_alg = VITERBI_ALG;  }  if(args_info.forward_given) {    align_alg = FORW_ALG;  }  if(args_info.nbest_given) {    align_alg = ONE_BEST_ALG;  }  if(args_info.max_d_given) {    run_max_d = YES;  }    if(args_info.path_given) {    print_path = YES;  }  if(args_info.nopostout_given) {    print_post_probs = NO;  }  if(args_info.nolabelout_given) {    print_labels = NO;  }  if(args_info.nollout_given) {    print_log_like = NO;  }  if(args_info.nooddsout_given) {    print_log_odds = NO;  }  if(args_info.norevout_given) {    print_reversi = NO;  }  if(args_info.alignpostout_given) {    print_post_probs = YES;  }  if(args_info.alignlabelout_given) {    print_labels = YES;  }  if(args_info.labelllout_given) {    print_log_like = YES;  }  if(args_info.labeloddsout_given) {    print_log_odds = YES;  }  if(args_info.labelrevout_given) {    print_reversi = YES;  }   /* msa scoring options */  if(args_info.msascoring_given) {    if(strcmp(args_info.msascoring_arg, "DP") == 0) {      scoring_method = DOT_PRODUCT;    }    else if(strcmp(args_info.msascoring_arg, "DPPI") == 0) {      scoring_method = DOT_PRODUCT_PICASSO;    }    else if(strcmp(args_info.msascoring_arg, "PI") == 0) {      scoring_method = PICASSO;    }    else if(strcmp(args_info.msascoring_arg, "PIS") == 0) {      scoring_method = PICASSO_SYM;    }    else if(strcmp(args_info.msascoring_arg, "GM") == 0) {      scoring_method = SJOLANDER;    }    else if(strcmp(args_info.msascoring_arg, "GMR") == 0) {      scoring_method = SJOLANDER_REVERSED;    }    //else if(strcmp(args_info.msascoring_arg, "SMP") == 0) {    //  scoring_method = SUBST_MTX_PRODUCT;    //}    //else if(strcmp(args_info.msascoring_arg, "SMDP") == 0) {    //  scoring_method = SUBST_MTX_DOT_PRODUCT;    //}    //else if(strcmp(args_info.msascoring_arg, "SMDPP") == 0) {    //  scoring_method = SUBST_MTX_DOT_PRODUCT_PRIOR;    //}    else {      printf("Incorrect scoring method option: %s\n", args_info.msascoring_arg);      exit(0);    }  }  if(args_info.usecolumns_given) {    if(strcmp(args_info.usecolumns_arg, "all") == 0) {      prf_mode = ALL_SEQS;    }    else {      lead_seq = atoi(args_info.usecolumns_arg);      prf_mode = LEAD_SEQ;      if(lead_seq <= 0) {	printf("Incorrect use-column option: %s\n", args_info.usecolumns_arg);	exit(0);      }    }  }  /* flags */  if(args_info.nolabels_given) {    /* checked after seqread */  }  if(args_info.verbose_given) {    verbose = YES;  }    /* check which algorithms to run */  if(print_post_probs == YES) {    run_forward = YES;    run_backward = YES;  }  if(print_labels == YES) {    if(label_alg == VITERBI_ALG) {      run_viterbi = YES;    }    else if(label_alg == ONE_BEST_ALG) {      run_n_best = YES;    }  }  if(print_log_like == YES || print_log_odds == YES || print_reversi == YES) {    if(align_alg == VITERBI_ALG) {      run_viterbi = YES;    }    else if(align_alg == FORW_ALG) {      run_forward = YES;    }  }      /* read subst mtx */  if(substmtxfile != NULL) {      read_subst_matrix_multi(&subst_mtxp, &subst_mtxp_2, &subst_mtxp_3, &subst_mtxp_4, substmtxfile);  }    /* get frequency file */  if(freqfile != NULL) {    read_frequencies_multi(freqfile, &aa_freqs, &aa_freqs_2, &aa_freqs_3, &aa_freqs_4);  }    /* get prior file */  /* get replacement letters */  if(replfile != NULL) {    get_replacement_letters_multi(replfile, &replacement_letters);  }  else {    replacement_letters.nr_alphabets = 0;    replacement_letters.nr_rl_1 = 0;    replacement_letters.nr_rl_2 = 0;    replacement_letters.nr_rl_3 = 0;    replacement_letters.nr_rl_4 = 0;    replacement_letters.letters_1 = NULL;    replacement_letters.probs_1 = NULL;    replacement_letters.letters_2 = NULL;    replacement_letters.probs_2 = NULL;    replacement_letters.letters_3 = NULL;    replacement_letters.probs_3 = NULL;    replacement_letters.letters_4 = NULL;    replacement_letters.probs_4 = NULL;  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -