📄 modseqalign.c
字号:
/* align two sequences via the most likely state path of a given hmm */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <limits.h>#include "structs.h"#include "funcs.h"#include "cmdline_modseqalign.h"#define NORM_LOG_LIKE 0#define LOGODDS 1#define HMM 20#define SEQ 21#define LONGEST_SEQ -1 /* Note: this constant is also defined in read_seqs.c */#define FIRST_SEQ 1#define LEAD_SEQ 10#define ALL_SEQS 11#define MAX_LINE 500//#define DEBUG_ALIGNextern int verbose;static struct hmm_multi_s hmm;static struct msa_sequences_multi_s *msa_seq_infop_template, *msa_seq_infop_target;static struct sequences_multi_s seq_info_template, seq_info_target;static struct replacement_letter_multi_s replacement_letters;double *subst_mtxp;double *subst_mtxp_2;double *subst_mtxp_3;double *subst_mtxp_4;double *aa_freqs;double *aa_freqs_2;double *aa_freqs_3;double *aa_freqs_4;void align_paths(int **path_templatep, int **path_targetp, int seq_len_target, int seq_len_template, FILE *outfile, int seq_format);int main(int argc, char* argv[]){ /* command line options */ FILE *hmmfile; /* file to read hmms from */ FILE *outfile; /* output file */ FILE *target_seqfile; /* file to read sequences from */ FILE *template_seqfile; /* file for reading sequence names */ FILE *replfile; /* file for reading special replacement letters */ FILE *priorfile; /* file to read priordistribution from */ FILE *substmtxfile; /* file to read substitution matrix from */ FILE *freqfile; int seq_format; /* input sequence format */ int align_alg, label_alg; /* algorithm for calculating match score */ int query; /* are hmms or seqs the queryside */ char seq_name[200]; char hmm_name[200]; char cur_savefile_name[200]; char *pure_seq_name_p, pure_seq_name_a[200]; /* sequences name without path name */ char *pure_hmm_name_p, pure_hmm_name_a[200]; /* sequences name without path name */ char *last_slash, *last_dot; int prf_mode; /* method for doing the msa-hmm search */ int lead_seq; /* index of the sequence on which the search is performed */ int use_gap_shares; /* account for/don't account for gaps use when calculating the share of a symbol */ int use_prior; /* are we working with priors or not */ int use_labels; int normalize; int scoring_method; int read_subst_mtx; int multi_scoring_method; int run_viterbi; int hmmfiletype; int use_lead_columns; struct viterbi_s *viterbi_mtx; double viterbi_score; int *path_template, *path_incrementable_template; int *path_target, *path_incrementable_target; int seq_len_template, seq_len_target; int b; int k; struct gengetopt_args_info args_info; /* temporary variables */ int i,j; /*standard loop indices */ /*init some variables */ outfile = NULL; hmmfile = NULL; target_seqfile = NULL; template_seqfile = NULL; replfile = NULL; priorfile = NULL; substmtxfile = NULL; freqfile = NULL; seq_format = STANDARD; align_alg = FORW_ALG; label_alg = ONE_BEST_ALG; query = SEQ; lead_seq = -1; prf_mode = ALL_SEQS; use_gap_shares = NO; use_prior = NO; use_labels = NO; normalize = NO; scoring_method = SJOLANDER; read_subst_mtx = NO; multi_scoring_method = JOINT_PROB; subst_mtxp = NULL; subst_mtxp_2 = NULL; subst_mtxp_3 = NULL; subst_mtxp_4 = NULL; aa_freqs = NULL; aa_freqs_2 = NULL; aa_freqs_3 = NULL; aa_freqs_4 = NULL; run_viterbi = NO; use_lead_columns = NO; /* parse command line */ if(cmdline_parser(argc, argv, &args_info) != 0) { exit(1); } /* compulsory options */ if(args_info.hmmfile_given) { if((hmmfile = fopen(args_info.hmmfile_arg, "r")) == NULL) { perror(args_info.hmmfile_arg); exit(0); } else { printf("Opened file %s for reading hmm\n",args_info.hmmfile_arg); } } if(args_info.target_given) { if((target_seqfile = fopen(args_info.target_arg, "r")) == NULL) { perror(args_info.target_arg); exit(0); } else { printf("Opened file %s for reading target sequence\n",args_info.target_arg); } } if(args_info.template_given) { if((template_seqfile = fopen(args_info.template_arg, "r")) == NULL) { perror(args_info.template_arg); exit(0); } else { printf("Opened file %s for reading template sequence\n",args_info.template_arg); } } if(args_info.outfile_given) { if((outfile = fopen(args_info.outfile_arg, "w")) == NULL) { perror(args_info.outfile_arg); exit(0); } else { printf("Opened file %s for writing\n",args_info.outfile_arg); } } if(args_info.seqformat_given) { if(strcmp(args_info.seqformat_arg, "fa") == 0) { seq_format = FASTA; } else if(strcmp(args_info.seqformat_arg, "s") == 0) { seq_format = STANDARD; } else if(strcmp(args_info.seqformat_arg, "msa") == 0) { seq_format = MSA_STANDARD; } else if(strcmp(args_info.seqformat_arg, "prf") == 0) { seq_format = PROFILE; } else { printf("Incorrect sequence format: %s\n", args_info.seqformat_arg); exit(0); } } /* non compulsory options */ if(args_info.smxfile_given) { if((substmtxfile = fopen(args_info.smxfile_arg, "r")) == NULL) { perror(args_info.smxfile_arg); exit(0); } else { read_subst_mtx = YES; printf("Opened file %s for reading substitution matrix\n",args_info.smxfile_arg); } } if(args_info.freqfile_given) { if((freqfile = fopen(args_info.freqfile_arg, "r")) == NULL) { perror(args_info.freqfile_arg); exit(0); } else { printf("Opened file %s for reading background frequencies\n",args_info.freqfile_arg); } } if(args_info.replfile_given) { if((replfile = fopen(args_info.replfile_arg, "r")) == NULL) { perror(args_info.replfile_arg); exit(0); } else { printf("Opened file %s for reading replacement letters\n",args_info.replfile_arg); } } if(args_info.priorfile_given) { if((priorfile = fopen(args_info.priorfile_arg, "r")) == NULL) { perror(args_info.priorfile_arg); exit(0); } else { printf("Opened file %s for reading priors\n",args_info.priorfile_arg); } } /* msa scoring options */ if(args_info.msascoring_given) { if(strcmp(args_info.msascoring_arg, "DP") == 0) { scoring_method = DOT_PRODUCT; } else if(strcmp(args_info.msascoring_arg, "DPPI") == 0) { scoring_method = DOT_PRODUCT_PICASSO; } else if(strcmp(args_info.msascoring_arg, "PI") == 0) { scoring_method = PICASSO; } else if(strcmp(args_info.msascoring_arg, "PIS") == 0) { scoring_method = PICASSO_SYM; } else if(strcmp(args_info.msascoring_arg, "GM") == 0) { scoring_method = SJOLANDER; } else if(strcmp(args_info.msascoring_arg, "GMR") == 0) { scoring_method = SJOLANDER_REVERSED; } //else if(strcmp(args_info.msascoring_arg, "SMP") == 0) { // scoring_method = SUBST_MTX_PRODUCT; //} //else if(strcmp(args_info.msascoring_arg, "SMDP") == 0) { // scoring_method = SUBST_MTX_DOT_PRODUCT; //} //else if(strcmp(args_info.msascoring_arg, "SMDPP") == 0) { // scoring_method = SUBST_MTX_DOT_PRODUCT_PRIOR; //} else { printf("Incorrect scoring method option: %s\n", args_info.msascoring_arg); exit(0); } } if(args_info.usecolumns_given) { if(strcmp(args_info.usecolumns_arg, "all") == 0) { prf_mode = ALL_SEQS; use_lead_columns = NO; } else { lead_seq = atoi(args_info.usecolumns_arg); prf_mode = LEAD_SEQ; use_lead_columns = YES; if(lead_seq <= 0) { printf("Incorrect use-column option: %s\n", args_info.usecolumns_arg); exit(0); } } } /* flags */ if(args_info.nolabels_given) { /* checked after seqread */ } if(args_info.verbose_given) { verbose = YES; } /* read subst mtx */ if(substmtxfile != NULL) { read_subst_matrix_multi(&subst_mtxp, &subst_mtxp_2, &subst_mtxp_3, &subst_mtxp_4, substmtxfile); } /* get frequency file */ if(freqfile != NULL) { read_frequencies_multi(freqfile, &aa_freqs, &aa_freqs_2, &aa_freqs_3, &aa_freqs_4); } /* get prior file */ /* get replacement letters */ if(replfile != NULL) { get_replacement_letters_multi(replfile, &replacement_letters); } else { replacement_letters.nr_alphabets = 0; replacement_letters.nr_rl_1 = 0; replacement_letters.nr_rl_2 = 0; replacement_letters.nr_rl_3 = 0; replacement_letters.nr_rl_4 = 0; replacement_letters.letters_1 = NULL; replacement_letters.probs_1 = NULL; replacement_letters.letters_2 = NULL; replacement_letters.probs_2 = NULL; replacement_letters.letters_3 = NULL; replacement_letters.probs_3 = NULL; replacement_letters.letters_4 = NULL; replacement_letters.probs_4 = NULL; } if((scoring_method == SUBST_MTX_PRODUCT || scoring_method == SUBST_MTX_DOT_PRODUCT || scoring_method == SUBST_MTX_DOT_PRODUCT_PRIOR) && read_subst_mtx == NO) { printf("Error: No substitution matrix supplied\n"); exit(0); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -