⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 modseqalign.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 3 页
字号:
/* align two sequences via the most likely state path of a given hmm */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <limits.h>#include "structs.h"#include "funcs.h"#include "cmdline_modseqalign.h"#define NORM_LOG_LIKE 0#define LOGODDS 1#define HMM 20#define SEQ 21#define LONGEST_SEQ -1 /* Note: this constant is also defined in read_seqs.c */#define FIRST_SEQ 1#define LEAD_SEQ 10#define ALL_SEQS 11#define MAX_LINE 500//#define DEBUG_ALIGNextern int verbose;static struct hmm_multi_s hmm;static struct msa_sequences_multi_s *msa_seq_infop_template, *msa_seq_infop_target;static struct sequences_multi_s seq_info_template, seq_info_target;static struct replacement_letter_multi_s replacement_letters;double *subst_mtxp;double *subst_mtxp_2;double *subst_mtxp_3;double *subst_mtxp_4;double *aa_freqs;double *aa_freqs_2;double *aa_freqs_3;double *aa_freqs_4;void align_paths(int **path_templatep, int **path_targetp, int seq_len_target, int seq_len_template, FILE *outfile,		 int seq_format);int main(int argc, char* argv[]){  /* command line options */  FILE *hmmfile; /* file to read hmms from */  FILE *outfile; /* output file */  FILE *target_seqfile; /* file to read sequences from */  FILE *template_seqfile; /* file for reading sequence names */  FILE *replfile; /* file for reading special replacement letters */  FILE *priorfile; /* file to read priordistribution from */  FILE *substmtxfile; /* file to read substitution matrix from */  FILE *freqfile;  int seq_format; /* input sequence format */  int align_alg, label_alg; /* algorithm for calculating match score */  int query; /* are hmms or seqs the queryside */  char seq_name[200];   char hmm_name[200];  char cur_savefile_name[200];  char *pure_seq_name_p, pure_seq_name_a[200]; /* sequences name without path name */  char *pure_hmm_name_p, pure_hmm_name_a[200]; /* sequences name without path name */  char *last_slash, *last_dot;  int prf_mode; /* method for doing the msa-hmm search */  int lead_seq; /* index of the sequence on which the search is performed */  int use_gap_shares; /* account for/don't account for gaps use when calculating the share of a symbol */  int use_prior; /* are we working with priors or not */  int use_labels;  int normalize;  int scoring_method;  int read_subst_mtx;  int multi_scoring_method;  int run_viterbi;  int hmmfiletype;  int use_lead_columns;  struct viterbi_s *viterbi_mtx;  double viterbi_score;  int *path_template, *path_incrementable_template;  int *path_target, *path_incrementable_target;  int seq_len_template, seq_len_target;  int b;  int k;    struct gengetopt_args_info args_info;  /* temporary variables */  int i,j; /*standard loop indices */   /*init some variables */  outfile = NULL;  hmmfile = NULL;  target_seqfile = NULL;  template_seqfile = NULL;  replfile = NULL;  priorfile = NULL;  substmtxfile = NULL;  freqfile = NULL;  seq_format = STANDARD;  align_alg = FORW_ALG;  label_alg = ONE_BEST_ALG;  query = SEQ;  lead_seq = -1;  prf_mode = ALL_SEQS;  use_gap_shares = NO;  use_prior = NO;  use_labels = NO;  normalize = NO;  scoring_method = SJOLANDER;  read_subst_mtx = NO;  multi_scoring_method = JOINT_PROB;  subst_mtxp = NULL;  subst_mtxp_2 = NULL;  subst_mtxp_3 = NULL;  subst_mtxp_4 = NULL;  aa_freqs = NULL;  aa_freqs_2 = NULL;  aa_freqs_3 = NULL;  aa_freqs_4 = NULL;  run_viterbi = NO;  use_lead_columns = NO;  /* parse command line */  if(cmdline_parser(argc, argv, &args_info) != 0) {    exit(1);  }  /* compulsory options */  if(args_info.hmmfile_given) {    if((hmmfile = fopen(args_info.hmmfile_arg, "r")) == NULL) {      perror(args_info.hmmfile_arg);      exit(0);    }    else {      printf("Opened file %s for reading hmm\n",args_info.hmmfile_arg);    }  }  if(args_info.target_given) {    if((target_seqfile = fopen(args_info.target_arg, "r")) == NULL) {      perror(args_info.target_arg);      exit(0);    }    else {      printf("Opened file %s for reading target sequence\n",args_info.target_arg);    }  }  if(args_info.template_given) {    if((template_seqfile = fopen(args_info.template_arg, "r")) == NULL) {      perror(args_info.template_arg);      exit(0);    }    else {      printf("Opened file %s for reading template sequence\n",args_info.template_arg);    }  }  if(args_info.outfile_given) {    if((outfile = fopen(args_info.outfile_arg, "w")) == NULL) {      perror(args_info.outfile_arg);      exit(0);    }    else {      printf("Opened file %s for writing\n",args_info.outfile_arg);    }  }    if(args_info.seqformat_given) {    if(strcmp(args_info.seqformat_arg, "fa") == 0) {      seq_format = FASTA;     }    else if(strcmp(args_info.seqformat_arg, "s") == 0) {      seq_format = STANDARD;    }    else if(strcmp(args_info.seqformat_arg, "msa") == 0) {      seq_format = MSA_STANDARD;    }    else if(strcmp(args_info.seqformat_arg, "prf") == 0) {      seq_format = PROFILE;    }    else {      printf("Incorrect sequence format: %s\n", args_info.seqformat_arg);      exit(0);    }  }  /* non compulsory options */  if(args_info.smxfile_given) {    if((substmtxfile = fopen(args_info.smxfile_arg, "r")) == NULL) {      perror(args_info.smxfile_arg);      exit(0);    }    else {      read_subst_mtx = YES;      printf("Opened file %s for reading substitution matrix\n",args_info.smxfile_arg);    }  }  if(args_info.freqfile_given) {    if((freqfile = fopen(args_info.freqfile_arg, "r")) == NULL) {      perror(args_info.freqfile_arg);      exit(0);    }    else {            printf("Opened file %s for reading background frequencies\n",args_info.freqfile_arg);    }  }  if(args_info.replfile_given) {    if((replfile = fopen(args_info.replfile_arg, "r")) == NULL) {      perror(args_info.replfile_arg);      exit(0);    }    else {      printf("Opened file %s for reading replacement letters\n",args_info.replfile_arg);    }  }  if(args_info.priorfile_given) {    if((priorfile = fopen(args_info.priorfile_arg, "r")) == NULL) {      perror(args_info.priorfile_arg);      exit(0);    }    else {      printf("Opened file %s for reading priors\n",args_info.priorfile_arg);    }  }      /* msa scoring options */  if(args_info.msascoring_given) {    if(strcmp(args_info.msascoring_arg, "DP") == 0) {      scoring_method = DOT_PRODUCT;    }    else if(strcmp(args_info.msascoring_arg, "DPPI") == 0) {      scoring_method = DOT_PRODUCT_PICASSO;    }    else if(strcmp(args_info.msascoring_arg, "PI") == 0) {      scoring_method = PICASSO;    }    else if(strcmp(args_info.msascoring_arg, "PIS") == 0) {      scoring_method = PICASSO_SYM;    }    else if(strcmp(args_info.msascoring_arg, "GM") == 0) {      scoring_method = SJOLANDER;    }    else if(strcmp(args_info.msascoring_arg, "GMR") == 0) {      scoring_method = SJOLANDER_REVERSED;    }    //else if(strcmp(args_info.msascoring_arg, "SMP") == 0) {    //  scoring_method = SUBST_MTX_PRODUCT;    //}    //else if(strcmp(args_info.msascoring_arg, "SMDP") == 0) {    //  scoring_method = SUBST_MTX_DOT_PRODUCT;    //}    //else if(strcmp(args_info.msascoring_arg, "SMDPP") == 0) {    //  scoring_method = SUBST_MTX_DOT_PRODUCT_PRIOR;    //}    else {      printf("Incorrect scoring method option: %s\n", args_info.msascoring_arg);      exit(0);    }  }  if(args_info.usecolumns_given) {    if(strcmp(args_info.usecolumns_arg, "all") == 0) {      prf_mode = ALL_SEQS;      use_lead_columns = NO;    }    else {      lead_seq = atoi(args_info.usecolumns_arg);      prf_mode = LEAD_SEQ;      use_lead_columns = YES;      if(lead_seq <= 0) {	printf("Incorrect use-column option: %s\n", args_info.usecolumns_arg);	exit(0);      }    }  }  /* flags */  if(args_info.nolabels_given) {    /* checked after seqread */  }  if(args_info.verbose_given) {    verbose = YES;  }   /* read subst mtx */  if(substmtxfile != NULL) {      read_subst_matrix_multi(&subst_mtxp, &subst_mtxp_2, &subst_mtxp_3, &subst_mtxp_4, substmtxfile);  }    /* get frequency file */  if(freqfile != NULL) {    read_frequencies_multi(freqfile, &aa_freqs, &aa_freqs_2, &aa_freqs_3, &aa_freqs_4);  }    /* get prior file */  /* get replacement letters */  if(replfile != NULL) {    get_replacement_letters_multi(replfile, &replacement_letters);  }  else {    replacement_letters.nr_alphabets = 0;    replacement_letters.nr_rl_1 = 0;    replacement_letters.nr_rl_2 = 0;    replacement_letters.nr_rl_3 = 0;    replacement_letters.nr_rl_4 = 0;    replacement_letters.letters_1 = NULL;    replacement_letters.probs_1 = NULL;    replacement_letters.letters_2 = NULL;    replacement_letters.probs_2 = NULL;    replacement_letters.letters_3 = NULL;    replacement_letters.probs_3 = NULL;    replacement_letters.letters_4 = NULL;    replacement_letters.probs_4 = NULL;  }  if((scoring_method == SUBST_MTX_PRODUCT || scoring_method == SUBST_MTX_DOT_PRODUCT ||      scoring_method == SUBST_MTX_DOT_PRODUCT_PRIOR)     && read_subst_mtx == NO) {    printf("Error: No substitution matrix supplied\n");    exit(0);  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -