⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 initfa.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 4 页
字号:
/*	initfa.c	*//* $Name: fa35_03_06 $ - $Id: initfa.c,v 1.172 2008/04/02 14:58:45 wrp Exp $ *//* copyright (c) 1996, 1997, 1998  William R. Pearson and the U. of Virginia *//* init??.c files provide function specific initializations *//* h_init()	- called from comp_lib.c, comp_thr.c to initialize pstruct ppst   		  which includes the alphabet, and pam matrix   alloc_pam()	- allocate pam matrix space   init_pam2()	- convert from 1D to 2D pam   init_pamx()	- convert from 1D to 2D pam   f_initenv()	- set up mngmsg and pstruct defaults   f_getopt()	- read fasta specific command line options   f_getarg()	- read ktup   resetp()	- reset the parameters, scoring matrix for DNA-DNA/DNA-prot   query_parm()	- ask for ktup   last_init()	- some things must be done last   f_initpam()	- set some parameters based on the pam matrix*/#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <string.h>#include <math.h>#ifdef UNIX#include <sys/types.h>#include <sys/stat.h>#endif#include "defs.h"#include "structs.h"#include "param.h"#ifndef PCOMPLIB#include "mw.h"#else#include "p_mw.h"#endif#define XTERNAL#include "upam.h"#include "uascii.h"#undef XTERNAL#define MAXWINDOW 32int initpam(char *, struct pstruct *);void init_pam2 (struct pstruct *ppst);void extend_pssm(unsigned char *aa0, int n0, struct pstruct *ppst);void build_xascii(int *qascii, char *save_str);void ann_ascii(int *qascii, char *ann_arr);void re_ascii(int *qascii, int *pascii);extern int nrand(int);/*  at some point, all the defaults should be driven from this table *//*#pgm	q_seq	l_seq	p_seq	matrix	g_open	g_ext	fr_shft	e_cut	ktup#	-n/-p		-s	-e	-f	-h/-j	-E	argv[3]fasta	prot(0)	prot(0)	prot(0)	bl50	-10	-2	-	10.0	2fasta	dna(1)	dna(1)	dna(1)	+5/-4	-12	-4	-	2.0	6ssearch	prot(0)	prot(0)	prot(0)	bl50	-10	-2	-	10.0	-ssearch	dna(1)	dna(1)	dna(1)	+5/-4	-16	-4	-	2.0	-fastx	dna(1)	prot(0)	prot(0)	BL50	-12	-2	-20	5.0	2fasty	dna(1)	prot(0)	prot(0)	BL50	-12	-2	-20/-24	5.0	2tfastx	dna(1)	prot(0)	prot(0)	BL50	-14	-2	-20	5.0	2tfasty	dna(1)	prot(0)	prot(0)	BL50	-14	-2	-20/-24	5.0	2fasts	prot(0)	prot(0)	prot(0)	MD20-MS	-	-	-	5.0	-fasts	dna(1)	dna(1)	dna(1)	+2/-4	-	-	-	5.0	1tfasts	prot(0)	dna(1)	prot(0)	MD10-MS	-	-	-	2.0	1fastf	prot(0)	prot(0)	prot(0)	MD20	-	-	-	2.0	1tfastf	prot(0)	dna(1)	prot(0)	MD10	-	-	-	1.0	1fastm	prot(0)	prot(0)	prot(0)	MD20	-	-	-	5.0	1fastm	dna(1)	dna(1)	dna(1)	+2/-4	-	-	-	2.0	1tfastm	prot(0)	dna(1)	prot(0)	MD10	-	-	-	2.0	1lalign	prot(0) prot(0) prot(0) BL50    -12     -2	-       1.0	-lalign	dna(1)  dna(1)  dna(1)  +5/-4   -12     -4	-	0.1	-*/struct pgm_def_str {  int pgm_id;  char *prog_func;  char *info_pgm_abbr;  char *iprompt0;  char *ref_str;  int PgmDID;  char *smstr;  int g_open_mod;  int g_ext_mod;  int gshift;  int hshift;  int e_cut;  int ktup;};char *ref_str_a[]={  "\nPlease cite:\n W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448\n",  "\nPlease cite:\n T. F. Smith and M. S. Waterman, (1981) J. Mol. Biol. 147:195-197; \n W.R. Pearson (1991) Genomics 11:635-650\n", "\nPlease cite:\n Pearson et al, Genomics (1997) 46:24-36\n", "\nPlease cite:\n Mackey et al. Mol. Cell. Proteomics  (2002) 1:139-147\n",  "\nPlease cite:\n W.R. Pearson (1996) Meth. Enzymol. 266:227-258\n",  "\nPlease cite:\n X. Huang and W. Miller (1991) Adv. Appl. Math. 12:373-381\n",  ""};#define FA_PID	1#define SS_PID	2#define FX_PID	3#define FY_PID	4#define FS_PID	5#define FF_PID	6#define FM_PID	7#define RSS_PID	8#define RFX_PID	9#define SSS_PID 10	/* old (slow) non-PG Smith-Waterman */#define TFA_PID	FA_PID+10#define TFX_PID	FX_PID+10#define TFY_PID	FY_PID+10#define TFS_PID	FS_PID+10#define TFF_PID	FF_PID+10#define TFM_PID FM_PID+10#define LAL_PID 18#define LNW_PID 19#define GNW_PID 20struct pgm_def_strpgm_def_arr[21] = {  {0, "", "", "", NULL, 400, "", 0, 0, 0, 0, 1.0, 0 },  /* 0 */  {FA_PID, "FASTA", "fa",   "FASTA searches a protein or DNA sequence data bank",   NULL, 401, "BL50", 0, 0, 0, 0, 10.0, 2}, /* 1 - FASTA */  {SS_PID, "SSEARCH","gsw","SSEARCH searches a sequence data bank",   NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0}, /* 2 - SSEARCH */  {FX_PID, "FASTX","fx",   "FASTX compares a DNA sequence to a protein sequence data bank",   NULL, 405, "BL50", -2, 0, -20, 0, 5.0, 2}, /* 3 - FASTX */  {FY_PID, "FASTY", "fy",   "FASTY compares a DNA sequence to a protein sequence data bank",   NULL, 405, "BL50", -2, 0, -20, -24, 5.0, 2}, /* 4 - FASTY */  {FS_PID, "FASTS", "fs",   "FASTS compares linked peptides to a protein data bank",   NULL, 400, "MD20-MS", 0, 0, 0, 0, 5.0, 1}, /* 5 - FASTS */  {FF_PID, "FASTF", "ff",   "FASTF compares mixed peptides to a protein databank",   NULL, 400, "MD20", 0, 0, 0, 0, 2.0, 1 }, /* 6 - FASTF */  {FM_PID, "FASTM", "fm",   "FASTM compares ordered peptides to a protein data bank",     NULL, 400, "MD20", 0, 0, 0, 0, 5.0, 1 }, /* 7 - FASTM */  {RSS_PID, "PRSS", "rss",   "PRSS evaluates statistical signficance using Smith-Waterman",   NULL, 401, "BL50", 0, 0, 0, 0, 1000.0, 0 }, /* 8 - PRSS */  {RFX_PID,"PRFX", "rfx",   "PRFX evaluates statistical signficance using FASTX",   NULL, 401, "BL50", -2, 0, -20, -24, 1000.0, 2 }, /* 9 - PRFX */  {SSS_PID, "OSEARCH","ssw","OSEARCH searches a sequence data bank",   NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0}, /* 2 - OSEARCH */  {TFA_PID, "TFASTA", "tfa",   "TFASTA compares a protein  to a translated DNA data bank",   NULL, 402, "BL50", -2, 0, 0, 5.0, 2 },  {0, "", "", "", NULL, 400, "", 0, 0, 0, 0, 1.0, 0 },  /* 0 */  {TFX_PID, "TFASTX", "tfx",   "TFASTX compares a protein to a translated DNA data bank",   NULL, 406, "BL50", -2, 0, -20, 0, 2.0, 2},  {TFY_PID, "TFASTY", "tfy",   "TFASTY compares a protein to a translated DNA data bank",   NULL, 406, "BL50", -2, 0, -20, -24, 2.0, 2},  {TFS_PID, "TFASTS", "tfs",   "TFASTS compares linked peptides to a translated DNA data bank",   NULL, 400, "MD10-MS", 0, 0, 0, 0, 2.0, 2 },  {TFF_PID, "TFASTF", "tff",   "TFASTF compares mixed peptides to a protein databank",   NULL, 400, "MD10", 0, 0, 0, 0, 1.0, 1 },  {TFM_PID, "TFASTM", "tfm",   "TFASTM compares ordered peptides to a translated DNA databank",   NULL, 400, "MD10", 0, 0, 0, 0, 1.0, 1 },  {LAL_PID, "LALIGN", "lal",   "LALIGN finds non-overlapping local alignments",   NULL, 404, "BL50", -2, 0, 0, 0, 1.0, 0}, 	/* 18 - LALIGN */  {LNW_PID, "GLSEARCH", "lnw",   "GLSEARCH produces global query alignments",   NULL, 404, "BL50", -2, 0, 0, 0, 10.0, 0}, 	/* 19 - GLSEARCH */  {GNW_PID, "GGSEARCH", "gnw",   "GGSEARCH produces global alignments",   NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0}, 	/* 20 - GGSEARCH */};struct msg_def_str {  int pgm_id;  int q_seqt;  int l_seqt;  int p_seqt;  int sw_flag;  int stages;  int qframe;  int nframe;  int nrelv, srelv, arelv;  char *f_id0, *f_id1, *label, *alabel;};char *align_label[]={  "Smith-Waterman",		/* 0 */  "banded Smith-Waterman",	/* 1 */  "Waterman-Eggert",		/* 2 */  "trans. Smith-Waterman",	/* 3 */  "global/local",		/* 4 */  "trans. global/local",	/* 5 */  "global/global (N-W)"	/* 6 */};/* pgm_id    q_seqt     l_seqt   p_seqt sw_f st qf nf nrv srv arv s_ix */struct msg_def_str msg_def_arr[21] = {  {0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, "", "", ""},	/* ID=0 */  {FA_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 1, 3,   "fa","sw", "opt"},  {SS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,   "sw","sw", "s-w"},  {FX_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3,   "fx","sx", "opt"},  {FY_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3,   "fy","sy", "opt"},  {FS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,   "fs","fs", "initn init1"},  {FF_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,   "ff","ff", "initn init1"},  {FM_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3,   "fm","fm","initn init1"},  {RSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 0, 1, 1, -1, 1, 1, 1,   "rss","sw","s-w"},  {RFX_PID, SEQT_DNA,SEQT_PROT, SEQT_PROT, 0, 1, 2, -1, 3, 1, 3,   "rfx","sx","opt"},  {SSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,   "sw","sw", "s-w"},  {TFA_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 0, 1, 1, 6, 3, 1, 3,   "tfa","fa","initn init1"},  {0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, "", "", ""},	/* ID=12 */  {TFX_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3,   "tfx","sx","initn opt"},  {TFY_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3,   "tfy","sy","initn opt"},  {TFS_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,   "tfs","fs","initn init1"},  {TFF_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,   "tff","ff","initn init1"},  {TFM_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3,   "tfm","fm","initn init1"},  {LAL_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,   "lsw","lsw", "ls-w"},  {LNW_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,   "gnw","gnw", "n-w"},  {GNW_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1,   "gnw","gnw", "n-w"},};intget_pgm_id() {  int rval=0;#ifdef FASTA#ifndef TFAST  pgm_def_arr[FA_PID].ref_str = ref_str_a[0];  msg_def_arr[FA_PID].alabel = align_label[0];  rval=FA_PID;#else  pgm_def_arr[TFA_PID].ref_str = ref_str_a[0];  msg_def_arr[TFA_PID].alabel = align_label[2];  rval=TFA_PID;#endif#endif#ifdef FASTX#ifndef TFAST#ifndef PRSS  pgm_def_arr[FX_PID].ref_str = ref_str_a[2];  msg_def_arr[FX_PID].alabel = align_label[3];  rval=FX_PID;#else  pgm_def_arr[RFX_PID].ref_str = ref_str_a[2];  msg_def_arr[FX_PID].alabel = align_label[3];  rval=RFX_PID;#endif#else  pgm_def_arr[TFX_PID].ref_str = ref_str_a[2];  msg_def_arr[TFX_PID].alabel = align_label[3];  rval=TFX_PID;#endif#endif#ifdef FASTY#ifndef TFAST  pgm_def_arr[FY_PID].ref_str = ref_str_a[2];  msg_def_arr[FY_PID].alabel = align_label[3];  rval=FY_PID;#else  pgm_def_arr[TFY_PID].ref_str = ref_str_a[2];  msg_def_arr[TFY_PID].alabel = align_label[3];  rval=TFY_PID;#endif#endif#ifdef FASTS#ifndef TFAST  pgm_def_arr[FS_PID].ref_str = ref_str_a[3];  msg_def_arr[FS_PID].alabel = align_label[4];  rval=FS_PID;#else  pgm_def_arr[TFS_PID].ref_str = ref_str_a[3];  msg_def_arr[TFS_PID].alabel = align_label[5];  rval=TFS_PID;#endif#endif#ifdef FASTF#ifndef TFAST  pgm_def_arr[FF_PID].ref_str = ref_str_a[3];  msg_def_arr[FF_PID].alabel = align_label[4];  rval=FF_PID;#else  pgm_def_arr[TFF_PID].ref_str = ref_str_a[3];  msg_def_arr[TFF_PID].alabel = align_label[5];  rval=TFF_PID;#endif#endif#ifdef FASTM#ifndef TFAST  pgm_def_arr[FM_PID].ref_str = ref_str_a[3];  msg_def_arr[FM_PID].alabel = align_label[4];  rval=FM_PID;#else  pgm_def_arr[TFM_PID].ref_str = ref_str_a[3];  msg_def_arr[TFM_PID].alabel = align_label[5];  rval=TFM_PID;#endif#endif#ifdef SSEARCH#define CAN_PSSM  pgm_def_arr[SS_PID].ref_str = ref_str_a[1];  msg_def_arr[SS_PID].alabel = align_label[0];  rval=SS_PID;#endif#ifdef OSEARCH  pgm_def_arr[SSS_PID].ref_str = ref_str_a[1];  msg_def_arr[SSS_PID].alabel = align_label[0];  rval=SSS_PID;#endif#ifdef LALIGN#define CAN_PSSM  pgm_def_arr[LAL_PID].ref_str = ref_str_a[5];  msg_def_arr[LAL_PID].alabel = align_label[2];  rval=LAL_PID;#endif#ifdef GLSEARCH#define CAN_PSSM  pgm_def_arr[LNW_PID].ref_str = ref_str_a[6];  msg_def_arr[LNW_PID].alabel = align_label[4];  rval=LNW_PID;#endif#ifdef GGSEARCH#define CAN_PSSM  pgm_def_arr[GNW_PID].ref_str = ref_str_a[6];  msg_def_arr[GNW_PID].alabel = align_label[6];  rval=GNW_PID;#endif  return rval;}char *iprompt1=" test sequence file name: ";char *iprompt2=" database file name: ";char *verstr="version 35.03 Feb. 18, 2008";char   *s_optstr = "13Ac:f:g:h:j:k:M:nopP:r:s:St:Ux:y:z:";static int mktup=2;static int ktup_set = 0;static int gap_set=0;static int del_set=0;static int mshuff_set = 0;static int prot2dna = 0;extern int max_workers;extern void s_abort(char *, char *);extern void init_ascii(int ext_sq, int *sascii, int dnaseq);extern int standard_pam(char *smstr, struct pstruct *ppst,			int del_set, int gap_set);extern void mk_n_pam(int *arr,int siz, int mat, int mis);extern int karlin(int , int, double *, double *, double *);extern void init_karlin_a(struct pstruct *, double *, double **);extern int do_karlin_a(int **, struct pstruct *, double *,		       double *, double *, double *, double *);#if defined(TFAST) || defined(FASTX) || defined(FASTY)extern void aainit(int tr_type, int debug);#endifchar *iprompt0, *prog_func, *refstr;/* Sets defaults assuming a protein sequence */void h_init (struct pstruct *ppst, struct mngmsg *m_msp, char *info_pgm_abbr){  struct pgm_def_str pgm_def;  int i, pgm_id;  ppst->pgm_id  = pgm_id =  get_pgm_id();  pgm_def = pgm_def_arr[pgm_id];  /* check that pgm_def_arr[] is valid */  if (pgm_def.pgm_id != pgm_id) {    fprintf(stderr,	    "**pgm_def integrity failure: def.pgm_id %d != pgm_id %d**\n",	    pgm_def.pgm_id, pgm_id);    exit(1);  }  /* check that msg_def_arr[] is valid */  if (msg_def_arr[pgm_id].pgm_id != pgm_id) {    fprintf(stderr,	    "**msg_def integrity failure: def.pgm_id %d != pgm_id %d**\n",	    msg_def_arr[pgm_id].pgm_id, pgm_id);    exit(1);  }  strncpy(info_pgm_abbr,pgm_def.info_pgm_abbr,MAX_SSTR);  iprompt0 = pgm_def.iprompt0;  refstr = pgm_def.ref_str;  prog_func = pgm_def.prog_func;  /* MAXTOT = MAXTST + MAXLIB for everything except TFAST,     where it is MAXTST + MAXTRN */  m_msp->max_tot = MAXTOT;  /* set up DNA query sequence if required*/  if (msg_def_arr[pgm_id].q_seqt == SEQT_DNA) {    memcpy(qascii,nascii,sizeof(qascii));    m_msp->qdnaseq = SEQT_DNA;  }  else { 	/* when SEQT_UNK, start with protein */    memcpy(qascii,aascii,sizeof(qascii));    m_msp->qdnaseq = msg_def_arr[pgm_id].q_seqt;  }#if defined(FASTF) || defined(FASTS) || defined(FASTM)  qascii[','] = ESS;  /* also initialize aascii, nascii for databases */  qascii['*'] = NA;#endif  /* initialize a pam matrix */  strncpy(ppst->pamfile,pgm_def.smstr,MAX_FN);  standard_pam(ppst->pamfile,ppst,del_set,gap_set);  ppst->have_pam2 = 0;  /* this is always protein by default */  ppst->nsq = naa;  ppst->nsqx = naax;  for (i=0; i<=ppst->nsqx; i++) {    ppst->sq[i] = aa[i];    ppst->hsq[i] = haa[i];    ppst->sqx[i]=aax[i];	/* sq = aa */    ppst->hsqx[i]=haax[i];	/* hsq = haa */  }  ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';  /* set up the c_nt[] mapping */#if defined(FASTS) || defined(FASTF) || defined(FASTM)  ppst->c_nt[ESS] = ESS;#endif  ppst->c_nt[0]=0;  for (i=1; i<=nnt; i++) {    ppst->c_nt[i]=gc_nt[i];    ppst->c_nt[i+nnt]=gc_nt[i]+nnt;  }#ifdef CAN_PSSM  ppst->pam2p[0] = NULL;  ppst->pam2p[1] = NULL;#endif}/* * alloc_pam(): allocates memory for the 2D pam matrix as well * as for the integer array used to transmit the pam matrix */voidalloc_pam (int d1, int d2, struct pstruct *ppst){  int     i, *d2p;  char err_str[128];  if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) {     sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);     s_abort (err_str,"");  }  if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) {     sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);     s_abort (err_str,"");  }  if ((d2p = pam12 = (int *) calloc (d1 * d2, sizeof (int))) == NULL) {     sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1);     s_abort (err_str,"");   }   for (i = 0; i < d1; i++, d2p += d2)      ppst->pam2[0][i] = d2p;   if ((d2p=pam12x= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) {     sprintf(err_str,"Cannot allocate 2d pam matrix: %d",d2);     s_abort (err_str,"");   }   for (i = 0;  i < d1; i++, d2p += d2)      ppst->pam2[1][i] = d2p;   ppst->have_pam2 = 1;}/* *  init_pam2(struct pstruct pst): Converts 1-D pam matrix to 2-D */voidinit_pam2 (struct pstruct *ppst) {  int     i, j, k, nsq, sa_t;  int ix_j, ix_l, ix_i;  nsq = ppst->nsq;  sa_t = aascii['*'];	/* this is the last character for which pam[] is available */  ppst->pam2[0][0][0] = -BIGNUM;  ppst->pam_h = -1; ppst->pam_l = 1;  k = 0;  for (i = 1; i <= sa_t; i++) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -