⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 initfa.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 4 页
字号:
    ppst->pam2[0][0][i] = ppst->pam2[0][i][0] = -BIGNUM;    for (j = 1; j <= i; j++) {      ppst->pam2[0][j][i] = ppst->pam2[0][i][j] = pam[k++] - ppst->pamoff;      if (ppst->pam_l > ppst->pam2[0][i][j]) ppst->pam_l =ppst->pam2[0][i][j];      if (ppst->pam_h < ppst->pam2[0][i][j]) ppst->pam_h =ppst->pam2[0][i][j];    }  }  /* add values for 'J' (I/L) value, which are not present in 1-D matrices */  ix_i = pascii['I'];  ix_l = pascii['L'];  ix_j = pascii['J'];    if (ix_j > ppst->nsq) return;  for (i=1; i <= nsq; i++) {    /* do not assume symmetric matrices */    ppst->pam2[0][ix_j][i] =      max(ppst->pam2[0][ix_i][i],ppst->pam2[0][ix_l][i]);    ppst->pam2[0][i][ix_j] =      max(ppst->pam2[0][i][ix_i],ppst->pam2[0][i][ix_l]);  }}voidinit_pamx (struct pstruct *ppst) {  int     i, j, k, nsq, pam_xx, pam_xm;  int sa_x, sa_t, tmp;  nsq = ppst->nsq;  ppst->nt_align = (ppst->dnaseq== SEQT_DNA || ppst->dnaseq == SEQT_RNA);  if (ppst->nt_align) {    sa_x = pascii['N'];    sa_t = sa_x;  }  else {    sa_x = pascii['X'];    sa_t = pascii['*'];  }  /* build an asymmetric matrix for RNA */  if (ppst->dnaseq == SEQT_RNA && !ppst->pam_set) {    tmp = ppst->pam2[0][nascii['G']][nascii['G']] - 3;    ppst->pam2[0][nascii['A']][nascii['G']] =       ppst->pam2[0][nascii['C']][nascii['T']] =       ppst->pam2[0][nascii['C']][nascii['U']] = tmp;  }  if (ppst->pam_x_set) {    for (i=1; i<=nsq; i++) {      if (sa_x <= nsq) ppst->pam2[0][sa_x][i] = ppst->pam2[0][i][sa_x]=ppst->pam_xm;      if (sa_t <= nsq) ppst->pam2[0][sa_t][i] = ppst->pam2[0][i][sa_t]=ppst->pam_xm;    }    if (sa_x <= nsq) ppst->pam2[0][sa_x][sa_x]=ppst->pam_xx;    if (sa_t <= nsq) ppst->pam2[0][sa_t][sa_t]=ppst->pam_xx;  }  else {    ppst->pam_xx = ppst->pam2[0][sa_x][sa_x];    ppst->pam_xm = ppst->pam2[0][1][sa_x];  }  pam_xx = ppst->pam_xx;  pam_xm = ppst->pam_xm;  if (ppst->ext_sq_set) {	/* using extended alphabet */    /* fill in pam2[1] matrix */    ppst->pam2[1][0][0] = -BIGNUM;    /* fill in additional parts of the matrix */    for (i = 1; i <= nsq; i++) {      /* -BIGNUM to all matches vs 0 */      ppst->pam2[0][0][i+nsq] = ppst->pam2[0][i+nsq][0] = 	ppst->pam2[1][0][i+nsq] = ppst->pam2[1][i+nsq][0] = 	ppst->pam2[1][0][i] = ppst->pam2[1][i][0] = -BIGNUM;      for (j = 1; j <= nsq; j++) {	/* replicate pam2[0] to i+nsq, j+nsq, also initialize lowest of pam2[1]*/	ppst->pam2[0][i+nsq][j] = ppst->pam2[0][i][j+nsq] =	  ppst->pam2[0][i+nsq][j+nsq] = ppst->pam2[1][i][j] =	  ppst->pam2[0][i][j];	/* set the high portion of pam2[1] to the corresponding value	   of pam2[1][sa_x][j] */	ppst->pam2[1][i+nsq][j] = ppst->pam2[1][i][j+nsq]=	  ppst->pam2[1][i+nsq][j+nsq]=ppst->pam2[0][sa_x][j];      }    }  }}/*  function specific initializations */voidf_initenv (struct mngmsg *m_msp, struct pstruct *ppst, unsigned char **aa0) {  struct msg_def_str m_msg_def;  int pgm_id;  pgm_id = ppst->pgm_id;  m_msg_def = msg_def_arr[pgm_id];  m_msp->last_calc_flg=0;  strncpy(m_msp->f_id0,m_msg_def.f_id0,sizeof(m_msp->f_id0));  strncpy(m_msp->f_id1,m_msg_def.f_id1,sizeof(m_msp->f_id1));  strncpy (m_msp->label, m_msg_def.label, sizeof(m_msp->label));  strncpy(m_msp->alabel, m_msg_def.alabel, sizeof(m_msp->alabel));#if !defined(SSEARCH) && !defined(GGSEARCH) && !defined(GLSEARCH) && !defined(LALIGN)  strncpy (m_msp->alab[0],"initn",20);  strncpy (m_msp->alab[1],"init1",20);  strncpy (m_msp->alab[2],"opt",20);#else#if defined(SSEARCH) || defined(LALIGN)  strncpy (m_msp->alab[0],"s-w opt",20);#else  strncpy (m_msp->alab[0],"n-w opt",20);#endif#endif#if defined(GGSEARCH) || defined(GLSEARCH)  m_msp->zsflag = ppst->zsflag = ppst->zsflag_f = 0;#else  m_msp->zsflag = ppst->zsflag = ppst->zsflag_f = 1;#endif  ppst->gdelval += pgm_def_arr[pgm_id].g_open_mod;  ppst->ggapval += pgm_def_arr[pgm_id].g_ext_mod;  ppst->sw_flag = m_msg_def.sw_flag;  ppst->e_cut = m_msp->e_cut=pgm_def_arr[pgm_id].e_cut;#ifndef LALIGN  ppst->e_cut_r = 0.01;		/* only significant */#else  ppst->e_cut_r = ppst->e_cut;	/* everything if local */#endif  ppst->score_ix = 0;  ppst->histint = 2;  m_msp->qframe = m_msg_def.qframe;  m_msp->nframe = m_msg_def.nframe;  m_msp->nrelv = m_msg_def.nrelv;  m_msp->srelv = m_msg_def.srelv;  m_msp->arelv = m_msg_def.arelv;  m_msp->stages = m_msg_def.stages;  m_msp->shuff_wid = 0;  m_msp->shuff_max = MAX_RSTATS;  /* see param.h for the definition of all these */  m_msp->qshuffle = 0;  m_msp->nm0 = 1;  m_msp->escore_flg = 0;  /* pam information */  ppst->pam_pssm = 0;#if defined(FASTS) || defined(FASTF) || defined(FASTM)   ppst->pam_xx = ppst->pam_xm = 0;#else  ppst->pam_xx = 1;  /* set >0 to use pam['X']['X'] value */  ppst->pam_xm = -1;  /* set >0 to use pam['X']['A-Z'] value */#endif  ppst->pam_x_set = 0;  ppst->pam_set = 0;  ppst->pam_pssm = 0;  ppst->p_d_set = 0;  ppst->pamoff = 0;  ppst->ext_sq_set = 0;  if (pgm_def_arr[ppst->pgm_id].ktup > 0) {    mktup = 2;    ppst->param_u.fa.bestscale = 300;    ppst->param_u.fa.bestoff = 36;    ppst->param_u.fa.bkfact = 6;    ppst->param_u.fa.scfact = 3;    ppst->param_u.fa.bktup = 2;    ppst->param_u.fa.ktup = 0;    ppst->param_u.fa.bestmax = 50;    ppst->param_u.fa.pamfact = 1;    ppst->param_u.fa.altflag = 0;    ppst->param_u.fa.optflag = 1;    ppst->param_u.fa.iniflag = 0;    ppst->param_u.fa.optcut = 0;    ppst->param_u.fa.optcut_set = 0;    ppst->param_u.fa.cgap = 0;    ppst->param_u.fa.optwid = MAXWINDOW;  }}/*  switches for fasta only */static int shift_set=0;static int subs_set=0;static int sw_flag_set=0;static int nframe_set=0;static int wid_set=0;voidf_getopt (char copt, char *optarg,	  struct mngmsg *m_msg, struct pstruct *ppst){  int pgm_id;  char *bp;  pgm_id = ppst->pgm_id;  switch (copt) {  case '1':     if (pgm_def_arr[pgm_id].ktup > 0) {      ppst->param_u.fa.iniflag=1;    }     break;  case '3':    nframe_set = 1;    if (pgm_id == TFA_PID) {      m_msg->nframe = 3; break;    }    else {      m_msg->nframe = 1;	/* for TFASTXY */      m_msg->qframe = 1;  /* for FASTA, FASTX */    }    break;  case 'A':    ppst->sw_flag= 1;    sw_flag_set = 1;    break;  case 'c':    if (pgm_def_arr[pgm_id].ktup > 0) {      sscanf (optarg, "%d", &ppst->param_u.fa.optcut);      ppst->param_u.fa.optcut_set = 1;    }    break;  case 'f':    sscanf (optarg, "%d", &ppst->gdelval);    if (ppst->gdelval > 0) ppst->gdelval = -ppst->gdelval;    del_set = 1;    break;  case 'g':    sscanf (optarg, "%d", &ppst->ggapval);    if (ppst->ggapval > 0) ppst->ggapval = -ppst->ggapval;    gap_set = 1;    break;  case 'h':    sscanf (optarg, "%d", &ppst->gshift);    if (ppst->gshift > 0) ppst->gshift = -ppst->gshift;    shift_set = 1;    break;  case 'j':    sscanf (optarg, "%d", &ppst->gsubs);    subs_set = 1;    break;  case 'k':    sscanf (optarg, "%d", &m_msg->shuff_max);    mshuff_set = 1;    break;  case 'M':    sscanf(optarg,"%d-%d",&m_msg->n1_low,&m_msg->n1_high);    if (m_msg->n1_low < 0) {      m_msg->n1_high = -m_msg->n1_low;      m_msg->n1_low = 0;    }    if (m_msg->n1_high == 0) m_msg->n1_high = BIGNUM;    if (m_msg->n1_low > m_msg->n1_high) {      fprintf(stderr," low cutoff %d greater than high %d\n",	      m_msg->n1_low, m_msg->n1_high);      m_msg->n1_low = 0;      m_msg->n1_high = BIGNUM;    }    ppst->n1_low = m_msg->n1_low;    ppst->n1_high = m_msg->n1_high;    break;  case 'n':    m_msg->qdnaseq = SEQT_DNA;    re_ascii(qascii,nascii);    strncpy(m_msg->sqnam,"nt",4);    prot2dna = 1;    break;  case 'o':    if (pgm_def_arr[pgm_id].ktup > 0) {      ppst->param_u.fa.optflag = 0;      msg_def_arr[pgm_id].nrelv = m_msg->nrelv = 2;    }    break;  case 'p':    m_msg->qdnaseq = SEQT_PROT;    ppst->dnaseq = SEQT_PROT;    strncpy(m_msg->sqnam,"aa",4);    break;  case 'P':    strncpy(ppst->pgpfile,optarg,MAX_FN);    if ((bp=strchr(ppst->pgpfile,' '))!=NULL) {      *bp='\0';      ppst->pgpfile_type = atoi(bp+1);    }    else ppst->pgpfile_type = 0;    ppst->pgpfile[MAX_FN-1]='\0';    ppst->pam_pssm = 1;    break;  case 'r':    sscanf(optarg,"%d/%d",&ppst->p_d_mat,&ppst->p_d_mis);    if (ppst->p_d_mat > 0 && ppst->p_d_mis < 0) {      ppst->p_d_set = 1;      strncpy(ppst->pamfile,optarg,40);    }    break;  case 's':    strncpy (ppst->pamfile, optarg, 120);    ppst->pamfile[120-1]='\0';    if (!standard_pam(ppst->pamfile,ppst,del_set, gap_set)) {      if (!initpam (ppst->pamfile, ppst)) {	strncpy(ppst->pamfile,pgm_def_arr[pgm_id].smstr,MAX_FN);      }    }    ppst->pam_set=1;    break;  case 'S':	/* turn on extended alphabet for seg */    ppst->ext_sq_set = 1;    break;  case 't':    if (tolower(optarg[0])=='t') {      m_msg->term_code = aascii['*']; optarg++;    }    if (*optarg) {sscanf (optarg, "%d", &ppst->tr_type);}    break;  case 'U':    m_msg->qdnaseq = SEQT_RNA;    memcpy(qascii,nascii,sizeof(qascii));    strncpy(m_msg->sqnam,"nt",4);    nt[nascii['T']]='U';    prot2dna=1;    break;  case 'x':    if (strchr(optarg,',')!=NULL) {      sscanf (optarg,"%d,%d",&ppst->pam_xx, &ppst->pam_xm);    }    else {      sscanf (optarg,"%d",&ppst->pam_xx);      ppst->pam_xm = ppst->pam_xx;    }    ppst->pam_x_set=1;    break;  case 'y':    if (pgm_def_arr[pgm_id].ktup > 0) {      sscanf (optarg, "%d", &ppst->param_u.fa.optwid);      wid_set = 1;    }    break;  case 'z':    sscanf(optarg,"%d",&ppst->zsflag);    break;  }}voidf_lastenv (struct mngmsg *m_msg, struct pstruct *ppst){  char save_str[MAX_SSTR];#if !defined(FASTM) && !defined(FASTS) && !defined(FASTF)  strncpy(save_str,"*",sizeof(save_str));#else  strncpy(save_str,",",sizeof(save_str));#endif  if (m_msg->qdnaseq == SEQT_UNK) {    build_xascii(qascii,save_str);    if (m_msg->q_ann_flg) ann_ascii(qascii,m_msg->ann_arr);  }  /* this check allows lc DNA sequence queries with FASTX */#if defined(FASTA) && !defined(FASTS) && !defined(FASTM) && !defined(FASTF)  else   init_ascii(ppst->ext_sq_set,qascii,m_msg->qdnaseq);#endif}voidf_getarg (int argc, char **argv, int optind,	  struct mngmsg *m_msg, struct pstruct *ppst){  if (pgm_def_arr[ppst->pgm_id].ktup > 0) {    if (argc - optind >= 4) {      sscanf (argv[optind + 3], "%d", &ppst->param_u.fa.ktup);      ktup_set = 1;    }    else      ppst->param_u.fa.ktup = -ppst->param_u.fa.bktup;  }    if (ppst->pgm_id == RSS_PID && argc - optind > 3) {    sscanf (argv[optind + 3], "%d", &m_msg->shuff_max);  }  if (ppst->pgm_id == RFX_PID && argc - optind > 4) {    sscanf (argv[optind + 4], "%d", &m_msg->shuff_max);  }}/* fills in the query ascii mapping from the parameter   ascii mapping.*/voidre_ascii(int *qascii, int *pascii) {  int i;  for (i=0; i < 128; i++) {    if (qascii[i] > '@' || qascii[i] < ESS) {      qascii[i] = pascii[i];    }  }}/* recode has become function specific to accommodate FASTS/M *//* modified 28-Dec-2004 to ensure that all mapped characters   are valid */intrecode(unsigned char *seq, int n, int *qascii, int nsqx) {  int i,j;  char save_c;#if defined(FASTS) || defined(FASTM)  qascii[',']=ESS;#endif  for (i=0; i < n; i++) {    save_c = seq[i];    if (seq[i] > '@') seq[i] = qascii[seq[i]];    if (seq[i] > nsqx && seq[i]!=ESS) {      fprintf(stderr, "*** Warning - unrecognized residue at %d:%c - %2d\n",	      i,save_c,save_c);      seq[i] = qascii['X'];    }  }  seq[i]=EOSEQ;  return i;}/* here we have the query sequence, all the command line options,   but we need to set various parameter options based on the type   of the query sequence (m_msg->qdnaseq = 0:protein/1:DNA) and   the function (FASTA/FASTX/TFASTA)*//* this resetp is for conventional a FASTA/TFASTXYZ search */voidresetp (struct mngmsg *m_msg, struct pstruct *ppst) {  int i, pgm_id;  pgm_id = ppst->pgm_id;#if defined(TFAST)  if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {    fprintf(stderr," %s compares a protein to a translated\n\DNA sequence library.  Do not use a DNA query/scoring matrix.\n",prog_func);    exit(1);  }#else#if (defined(FASTX) || defined(FASTY))  if (!(m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA)) {    fprintf(stderr," FASTX/Y compares a DNA sequence to a protein database\n");    fprintf(stderr," Use a DNA query\n");    exit(1);  }#endif#endif/* this code changes parameters for programs (FA_PID, SS_PID, FS_PID,   RSS_PID) that can examine either protein (initial state) or DNA    Modified May, 2006 to reset e_cut for DNA comparisons.*/  if (msg_def_arr[pgm_id].q_seqt == SEQT_UNK) {    if (m_msg->qdnaseq == SEQT_DNA || m_msg->qdnaseq == SEQT_RNA) {      msg_def_arr[pgm_id].q_seqt = m_msg->qdnaseq;      msg_def_arr[pgm_id].p_seqt = SEQT_DNA;      msg_def_arr[pgm_id].l_seqt = SEQT_DNA;      if (m_msg->qdnaseq == SEQT_DNA) msg_def_arr[pgm_id].qframe = 2;      pgm_def_arr[pgm_id].e_cut /= 5.0;      ppst->e_cut_r = 0.001;    }    else {      msg_def_arr[pgm_id].q_seqt = SEQT_PROT;    }  }  ppst->dnaseq = msg_def_arr[pgm_id].p_seqt;  if (!sw_flag_set) ppst->sw_flag = msg_def_arr[pgm_id].sw_flag;  if (!m_msg->e_cut_set) {    ppst->e_cut = m_msg->e_cut=pgm_def_arr[pgm_id].e_cut;  }  if (ppst->dnaseq == SEQT_DNA && m_msg->qdnaseq==SEQT_RNA) {    ppst->dnaseq = SEQT_RNA;    ppst->nt_align = 1;  }  if (ppst->dnaseq==SEQT_DNA) pascii = &nascii[0];  else if (ppst->dnaseq==SEQT_RNA) {    pascii = &nascii[0];    ppst->sq[nascii['T']] = 'U';  }  else pascii = &aascii[0];  m_msg->ldnaseq = msg_def_arr[pgm_id].l_seqt;  if (m_msg->ldnaseq & SEQT_DNA) {    memcpy(lascii,nascii,sizeof(lascii));#ifndef TFAST#ifdef DNALIB_LC   init_ascii(ppst->ext_sq_set,lascii,m_msg->ldnaseq);#endif#else  /* no init_ascii() because we translate lower case library sequences */#endif  }  else {    memcpy(lascii,aascii,sizeof(lascii));	/* initialize lib mapping */#if defined(FASTF) || defined(FASTS) || defined(FASTM)    lascii['*'] = NA;#endif    init_ascii(ppst->ext_sq_set,lascii,m_msg->ldnaseq);  }  if (!nframe_set) {    m_msg->qframe = msg_def_arr[pgm_id].qframe;    m_msg->nframe = msg_def_arr[pgm_id].nframe;  }  /* the possibilities:  	     -i  -3	qframe	revcomp   FA_D/FX   -    -        2       0	   FA_D/FX   +    -        2       1	   FA_D/FX   -    +        1       0	   FA_D/FX   +    +        2       1	  */  if (m_msg->qdnaseq == SEQT_DNA) {    m_msg->nframe = 1;    if (m_msg->qframe == 1 && m_msg->revcomp==1) {      m_msg->qframe = m_msg->revcomp+1;    }  }  else if (m_msg->qdnaseq == SEQT_RNA) {    m_msg->qframe = m_msg->revcomp+1;    m_msg->nframe = 1;  }  /* change settings for DNA search */  if (ppst->dnaseq == SEQT_DNA || ppst->dnaseq == SEQT_RNA) {    ppst->histint = 4;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -