📄 initfa.c
字号:
/* initfa.c *//* $Name: fa35_03_06 $ - $Id: initfa.c,v 1.172 2008/04/02 14:58:45 wrp Exp $ *//* copyright (c) 1996, 1997, 1998 William R. Pearson and the U. of Virginia *//* init??.c files provide function specific initializations *//* h_init() - called from comp_lib.c, comp_thr.c to initialize pstruct ppst which includes the alphabet, and pam matrix alloc_pam() - allocate pam matrix space init_pam2() - convert from 1D to 2D pam init_pamx() - convert from 1D to 2D pam f_initenv() - set up mngmsg and pstruct defaults f_getopt() - read fasta specific command line options f_getarg() - read ktup resetp() - reset the parameters, scoring matrix for DNA-DNA/DNA-prot query_parm() - ask for ktup last_init() - some things must be done last f_initpam() - set some parameters based on the pam matrix*/#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <string.h>#include <math.h>#ifdef UNIX#include <sys/types.h>#include <sys/stat.h>#endif#include "defs.h"#include "structs.h"#include "param.h"#ifndef PCOMPLIB#include "mw.h"#else#include "p_mw.h"#endif#define XTERNAL#include "upam.h"#include "uascii.h"#undef XTERNAL#define MAXWINDOW 32int initpam(char *, struct pstruct *);void init_pam2 (struct pstruct *ppst);void extend_pssm(unsigned char *aa0, int n0, struct pstruct *ppst);void build_xascii(int *qascii, char *save_str);void ann_ascii(int *qascii, char *ann_arr);void re_ascii(int *qascii, int *pascii);extern int nrand(int);/* at some point, all the defaults should be driven from this table *//*#pgm q_seq l_seq p_seq matrix g_open g_ext fr_shft e_cut ktup# -n/-p -s -e -f -h/-j -E argv[3]fasta prot(0) prot(0) prot(0) bl50 -10 -2 - 10.0 2fasta dna(1) dna(1) dna(1) +5/-4 -12 -4 - 2.0 6ssearch prot(0) prot(0) prot(0) bl50 -10 -2 - 10.0 -ssearch dna(1) dna(1) dna(1) +5/-4 -16 -4 - 2.0 -fastx dna(1) prot(0) prot(0) BL50 -12 -2 -20 5.0 2fasty dna(1) prot(0) prot(0) BL50 -12 -2 -20/-24 5.0 2tfastx dna(1) prot(0) prot(0) BL50 -14 -2 -20 5.0 2tfasty dna(1) prot(0) prot(0) BL50 -14 -2 -20/-24 5.0 2fasts prot(0) prot(0) prot(0) MD20-MS - - - 5.0 -fasts dna(1) dna(1) dna(1) +2/-4 - - - 5.0 1tfasts prot(0) dna(1) prot(0) MD10-MS - - - 2.0 1fastf prot(0) prot(0) prot(0) MD20 - - - 2.0 1tfastf prot(0) dna(1) prot(0) MD10 - - - 1.0 1fastm prot(0) prot(0) prot(0) MD20 - - - 5.0 1fastm dna(1) dna(1) dna(1) +2/-4 - - - 2.0 1tfastm prot(0) dna(1) prot(0) MD10 - - - 2.0 1lalign prot(0) prot(0) prot(0) BL50 -12 -2 - 1.0 -lalign dna(1) dna(1) dna(1) +5/-4 -12 -4 - 0.1 -*/struct pgm_def_str { int pgm_id; char *prog_func; char *info_pgm_abbr; char *iprompt0; char *ref_str; int PgmDID; char *smstr; int g_open_mod; int g_ext_mod; int gshift; int hshift; int e_cut; int ktup;};char *ref_str_a[]={ "\nPlease cite:\n W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448\n", "\nPlease cite:\n T. F. Smith and M. S. Waterman, (1981) J. Mol. Biol. 147:195-197; \n W.R. Pearson (1991) Genomics 11:635-650\n", "\nPlease cite:\n Pearson et al, Genomics (1997) 46:24-36\n", "\nPlease cite:\n Mackey et al. Mol. Cell. Proteomics (2002) 1:139-147\n", "\nPlease cite:\n W.R. Pearson (1996) Meth. Enzymol. 266:227-258\n", "\nPlease cite:\n X. Huang and W. Miller (1991) Adv. Appl. Math. 12:373-381\n", ""};#define FA_PID 1#define SS_PID 2#define FX_PID 3#define FY_PID 4#define FS_PID 5#define FF_PID 6#define FM_PID 7#define RSS_PID 8#define RFX_PID 9#define SSS_PID 10 /* old (slow) non-PG Smith-Waterman */#define TFA_PID FA_PID+10#define TFX_PID FX_PID+10#define TFY_PID FY_PID+10#define TFS_PID FS_PID+10#define TFF_PID FF_PID+10#define TFM_PID FM_PID+10#define LAL_PID 18#define LNW_PID 19#define GNW_PID 20struct pgm_def_strpgm_def_arr[21] = { {0, "", "", "", NULL, 400, "", 0, 0, 0, 0, 1.0, 0 }, /* 0 */ {FA_PID, "FASTA", "fa", "FASTA searches a protein or DNA sequence data bank", NULL, 401, "BL50", 0, 0, 0, 0, 10.0, 2}, /* 1 - FASTA */ {SS_PID, "SSEARCH","gsw","SSEARCH searches a sequence data bank", NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0}, /* 2 - SSEARCH */ {FX_PID, "FASTX","fx", "FASTX compares a DNA sequence to a protein sequence data bank", NULL, 405, "BL50", -2, 0, -20, 0, 5.0, 2}, /* 3 - FASTX */ {FY_PID, "FASTY", "fy", "FASTY compares a DNA sequence to a protein sequence data bank", NULL, 405, "BL50", -2, 0, -20, -24, 5.0, 2}, /* 4 - FASTY */ {FS_PID, "FASTS", "fs", "FASTS compares linked peptides to a protein data bank", NULL, 400, "MD20-MS", 0, 0, 0, 0, 5.0, 1}, /* 5 - FASTS */ {FF_PID, "FASTF", "ff", "FASTF compares mixed peptides to a protein databank", NULL, 400, "MD20", 0, 0, 0, 0, 2.0, 1 }, /* 6 - FASTF */ {FM_PID, "FASTM", "fm", "FASTM compares ordered peptides to a protein data bank", NULL, 400, "MD20", 0, 0, 0, 0, 5.0, 1 }, /* 7 - FASTM */ {RSS_PID, "PRSS", "rss", "PRSS evaluates statistical signficance using Smith-Waterman", NULL, 401, "BL50", 0, 0, 0, 0, 1000.0, 0 }, /* 8 - PRSS */ {RFX_PID,"PRFX", "rfx", "PRFX evaluates statistical signficance using FASTX", NULL, 401, "BL50", -2, 0, -20, -24, 1000.0, 2 }, /* 9 - PRFX */ {SSS_PID, "OSEARCH","ssw","OSEARCH searches a sequence data bank", NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0}, /* 2 - OSEARCH */ {TFA_PID, "TFASTA", "tfa", "TFASTA compares a protein to a translated DNA data bank", NULL, 402, "BL50", -2, 0, 0, 5.0, 2 }, {0, "", "", "", NULL, 400, "", 0, 0, 0, 0, 1.0, 0 }, /* 0 */ {TFX_PID, "TFASTX", "tfx", "TFASTX compares a protein to a translated DNA data bank", NULL, 406, "BL50", -2, 0, -20, 0, 2.0, 2}, {TFY_PID, "TFASTY", "tfy", "TFASTY compares a protein to a translated DNA data bank", NULL, 406, "BL50", -2, 0, -20, -24, 2.0, 2}, {TFS_PID, "TFASTS", "tfs", "TFASTS compares linked peptides to a translated DNA data bank", NULL, 400, "MD10-MS", 0, 0, 0, 0, 2.0, 2 }, {TFF_PID, "TFASTF", "tff", "TFASTF compares mixed peptides to a protein databank", NULL, 400, "MD10", 0, 0, 0, 0, 1.0, 1 }, {TFM_PID, "TFASTM", "tfm", "TFASTM compares ordered peptides to a translated DNA databank", NULL, 400, "MD10", 0, 0, 0, 0, 1.0, 1 }, {LAL_PID, "LALIGN", "lal", "LALIGN finds non-overlapping local alignments", NULL, 404, "BL50", -2, 0, 0, 0, 1.0, 0}, /* 18 - LALIGN */ {LNW_PID, "GLSEARCH", "lnw", "GLSEARCH produces global query alignments", NULL, 404, "BL50", -2, 0, 0, 0, 10.0, 0}, /* 19 - GLSEARCH */ {GNW_PID, "GGSEARCH", "gnw", "GGSEARCH produces global alignments", NULL, 404, "BL50", 0, 0, 0, 0, 10.0, 0}, /* 20 - GGSEARCH */};struct msg_def_str { int pgm_id; int q_seqt; int l_seqt; int p_seqt; int sw_flag; int stages; int qframe; int nframe; int nrelv, srelv, arelv; char *f_id0, *f_id1, *label, *alabel;};char *align_label[]={ "Smith-Waterman", /* 0 */ "banded Smith-Waterman", /* 1 */ "Waterman-Eggert", /* 2 */ "trans. Smith-Waterman", /* 3 */ "global/local", /* 4 */ "trans. global/local", /* 5 */ "global/global (N-W)" /* 6 */};/* pgm_id q_seqt l_seqt p_seqt sw_f st qf nf nrv srv arv s_ix */struct msg_def_str msg_def_arr[21] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", "", ""}, /* ID=0 */ {FA_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 1, 3, "fa","sw", "opt"}, {SS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1, "sw","sw", "s-w"}, {FX_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3, "fx","sx", "opt"}, {FY_PID, SEQT_DNA, SEQT_PROT, SEQT_PROT, 1, 1, 2, -1, 3, 1, 3, "fy","sy", "opt"}, {FS_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3, "fs","fs", "initn init1"}, {FF_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3, "ff","ff", "initn init1"}, {FM_PID, SEQT_PROT,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 3, 2, 3, "fm","fm","initn init1"}, {RSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 0, 1, 1, -1, 1, 1, 1, "rss","sw","s-w"}, {RFX_PID, SEQT_DNA,SEQT_PROT, SEQT_PROT, 0, 1, 2, -1, 3, 1, 3, "rfx","sx","opt"}, {SSS_PID, SEQT_UNK,SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1, "sw","sw", "s-w"}, {TFA_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 0, 1, 1, 6, 3, 1, 3, "tfa","fa","initn init1"}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", "", ""}, /* ID=12 */ {TFX_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3, "tfx","sx","initn opt"}, {TFY_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 2, 3, 2, 3, "tfy","sy","initn opt"}, {TFS_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3, "tfs","fs","initn init1"}, {TFF_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3, "tff","ff","initn init1"}, {TFM_PID, SEQT_PROT,SEQT_DNA, SEQT_PROT, 1, 1, 1, 6, 3, 2, 3, "tfm","fm","initn init1"}, {LAL_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1, "lsw","lsw", "ls-w"}, {LNW_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1, "gnw","gnw", "n-w"}, {GNW_PID, SEQT_UNK, SEQT_PROT, SEQT_PROT, 1, 1, 1, -1, 1, 1, 1, "gnw","gnw", "n-w"},};intget_pgm_id() { int rval=0;#ifdef FASTA#ifndef TFAST pgm_def_arr[FA_PID].ref_str = ref_str_a[0]; msg_def_arr[FA_PID].alabel = align_label[0]; rval=FA_PID;#else pgm_def_arr[TFA_PID].ref_str = ref_str_a[0]; msg_def_arr[TFA_PID].alabel = align_label[2]; rval=TFA_PID;#endif#endif#ifdef FASTX#ifndef TFAST#ifndef PRSS pgm_def_arr[FX_PID].ref_str = ref_str_a[2]; msg_def_arr[FX_PID].alabel = align_label[3]; rval=FX_PID;#else pgm_def_arr[RFX_PID].ref_str = ref_str_a[2]; msg_def_arr[FX_PID].alabel = align_label[3]; rval=RFX_PID;#endif#else pgm_def_arr[TFX_PID].ref_str = ref_str_a[2]; msg_def_arr[TFX_PID].alabel = align_label[3]; rval=TFX_PID;#endif#endif#ifdef FASTY#ifndef TFAST pgm_def_arr[FY_PID].ref_str = ref_str_a[2]; msg_def_arr[FY_PID].alabel = align_label[3]; rval=FY_PID;#else pgm_def_arr[TFY_PID].ref_str = ref_str_a[2]; msg_def_arr[TFY_PID].alabel = align_label[3]; rval=TFY_PID;#endif#endif#ifdef FASTS#ifndef TFAST pgm_def_arr[FS_PID].ref_str = ref_str_a[3]; msg_def_arr[FS_PID].alabel = align_label[4]; rval=FS_PID;#else pgm_def_arr[TFS_PID].ref_str = ref_str_a[3]; msg_def_arr[TFS_PID].alabel = align_label[5]; rval=TFS_PID;#endif#endif#ifdef FASTF#ifndef TFAST pgm_def_arr[FF_PID].ref_str = ref_str_a[3]; msg_def_arr[FF_PID].alabel = align_label[4]; rval=FF_PID;#else pgm_def_arr[TFF_PID].ref_str = ref_str_a[3]; msg_def_arr[TFF_PID].alabel = align_label[5]; rval=TFF_PID;#endif#endif#ifdef FASTM#ifndef TFAST pgm_def_arr[FM_PID].ref_str = ref_str_a[3]; msg_def_arr[FM_PID].alabel = align_label[4]; rval=FM_PID;#else pgm_def_arr[TFM_PID].ref_str = ref_str_a[3]; msg_def_arr[TFM_PID].alabel = align_label[5]; rval=TFM_PID;#endif#endif#ifdef SSEARCH#define CAN_PSSM pgm_def_arr[SS_PID].ref_str = ref_str_a[1]; msg_def_arr[SS_PID].alabel = align_label[0]; rval=SS_PID;#endif#ifdef OSEARCH pgm_def_arr[SSS_PID].ref_str = ref_str_a[1]; msg_def_arr[SSS_PID].alabel = align_label[0]; rval=SSS_PID;#endif#ifdef LALIGN#define CAN_PSSM pgm_def_arr[LAL_PID].ref_str = ref_str_a[5]; msg_def_arr[LAL_PID].alabel = align_label[2]; rval=LAL_PID;#endif#ifdef GLSEARCH#define CAN_PSSM pgm_def_arr[LNW_PID].ref_str = ref_str_a[6]; msg_def_arr[LNW_PID].alabel = align_label[4]; rval=LNW_PID;#endif#ifdef GGSEARCH#define CAN_PSSM pgm_def_arr[GNW_PID].ref_str = ref_str_a[6]; msg_def_arr[GNW_PID].alabel = align_label[6]; rval=GNW_PID;#endif return rval;}char *iprompt1=" test sequence file name: ";char *iprompt2=" database file name: ";char *verstr="version 35.03 Feb. 18, 2008";char *s_optstr = "13Ac:f:g:h:j:k:M:nopP:r:s:St:Ux:y:z:";static int mktup=2;static int ktup_set = 0;static int gap_set=0;static int del_set=0;static int mshuff_set = 0;static int prot2dna = 0;extern int max_workers;extern void s_abort(char *, char *);extern void init_ascii(int ext_sq, int *sascii, int dnaseq);extern int standard_pam(char *smstr, struct pstruct *ppst, int del_set, int gap_set);extern void mk_n_pam(int *arr,int siz, int mat, int mis);extern int karlin(int , int, double *, double *, double *);extern void init_karlin_a(struct pstruct *, double *, double **);extern int do_karlin_a(int **, struct pstruct *, double *, double *, double *, double *, double *);#if defined(TFAST) || defined(FASTX) || defined(FASTY)extern void aainit(int tr_type, int debug);#endifchar *iprompt0, *prog_func, *refstr;/* Sets defaults assuming a protein sequence */void h_init (struct pstruct *ppst, struct mngmsg *m_msp, char *info_pgm_abbr){ struct pgm_def_str pgm_def; int i, pgm_id; ppst->pgm_id = pgm_id = get_pgm_id(); pgm_def = pgm_def_arr[pgm_id]; /* check that pgm_def_arr[] is valid */ if (pgm_def.pgm_id != pgm_id) { fprintf(stderr, "**pgm_def integrity failure: def.pgm_id %d != pgm_id %d**\n", pgm_def.pgm_id, pgm_id); exit(1); } /* check that msg_def_arr[] is valid */ if (msg_def_arr[pgm_id].pgm_id != pgm_id) { fprintf(stderr, "**msg_def integrity failure: def.pgm_id %d != pgm_id %d**\n", msg_def_arr[pgm_id].pgm_id, pgm_id); exit(1); } strncpy(info_pgm_abbr,pgm_def.info_pgm_abbr,MAX_SSTR); iprompt0 = pgm_def.iprompt0; refstr = pgm_def.ref_str; prog_func = pgm_def.prog_func; /* MAXTOT = MAXTST + MAXLIB for everything except TFAST, where it is MAXTST + MAXTRN */ m_msp->max_tot = MAXTOT; /* set up DNA query sequence if required*/ if (msg_def_arr[pgm_id].q_seqt == SEQT_DNA) { memcpy(qascii,nascii,sizeof(qascii)); m_msp->qdnaseq = SEQT_DNA; } else { /* when SEQT_UNK, start with protein */ memcpy(qascii,aascii,sizeof(qascii)); m_msp->qdnaseq = msg_def_arr[pgm_id].q_seqt; }#if defined(FASTF) || defined(FASTS) || defined(FASTM) qascii[','] = ESS; /* also initialize aascii, nascii for databases */ qascii['*'] = NA;#endif /* initialize a pam matrix */ strncpy(ppst->pamfile,pgm_def.smstr,MAX_FN); standard_pam(ppst->pamfile,ppst,del_set,gap_set); ppst->have_pam2 = 0; /* this is always protein by default */ ppst->nsq = naa; ppst->nsqx = naax; for (i=0; i<=ppst->nsqx; i++) { ppst->sq[i] = aa[i]; ppst->hsq[i] = haa[i]; ppst->sqx[i]=aax[i]; /* sq = aa */ ppst->hsqx[i]=haax[i]; /* hsq = haa */ } ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0'; /* set up the c_nt[] mapping */#if defined(FASTS) || defined(FASTF) || defined(FASTM) ppst->c_nt[ESS] = ESS;#endif ppst->c_nt[0]=0; for (i=1; i<=nnt; i++) { ppst->c_nt[i]=gc_nt[i]; ppst->c_nt[i+nnt]=gc_nt[i]+nnt; }#ifdef CAN_PSSM ppst->pam2p[0] = NULL; ppst->pam2p[1] = NULL;#endif}/* * alloc_pam(): allocates memory for the 2D pam matrix as well * as for the integer array used to transmit the pam matrix */voidalloc_pam (int d1, int d2, struct pstruct *ppst){ int i, *d2p; char err_str[128]; if ((ppst->pam2[0] = (int **) malloc (d1 * sizeof (int *))) == NULL) { sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1); s_abort (err_str,""); } if ((ppst->pam2[1] = (int **) malloc (d1 * sizeof (int *))) == NULL) { sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1); s_abort (err_str,""); } if ((d2p = pam12 = (int *) calloc (d1 * d2, sizeof (int))) == NULL) { sprintf(err_str,"Cannot allocate 2D pam matrix: %d",d1); s_abort (err_str,""); } for (i = 0; i < d1; i++, d2p += d2) ppst->pam2[0][i] = d2p; if ((d2p=pam12x= (int *) malloc (d1 * d2 * sizeof (int))) == NULL) { sprintf(err_str,"Cannot allocate 2d pam matrix: %d",d2); s_abort (err_str,""); } for (i = 0; i < d1; i++, d2p += d2) ppst->pam2[1][i] = d2p; ppst->have_pam2 = 1;}/* * init_pam2(struct pstruct pst): Converts 1-D pam matrix to 2-D */voidinit_pam2 (struct pstruct *ppst) { int i, j, k, nsq, sa_t; int ix_j, ix_l, ix_i; nsq = ppst->nsq; sa_t = aascii['*']; /* this is the last character for which pam[] is available */ ppst->pam2[0][0][0] = -BIGNUM; ppst->pam_h = -1; ppst->pam_l = 1; k = 0; for (i = 1; i <= sa_t; i++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -