⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 initfa.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 4 页
字号:
    if (!del_set) {#ifdef OLD_FASTA_GAP      ppst->gdelval = -16;	/* def. del penalty */#else      ppst->gdelval = -12;	/* def. open penalty */#endif    }    if (!gap_set) ppst->ggapval = -4;	/* def. gap penalty */    if (pgm_def_arr[pgm_id].ktup > 0) {      /* these parameters are used to scale optcut, they should be replaced	 by statistically based parameters */      if (!wid_set) ppst->param_u.fa.optwid = 16;      ppst->param_u.fa.bestscale = 80;      ppst->param_u.fa.bkfact = 5;      ppst->param_u.fa.scfact = 1;      ppst->param_u.fa.bktup = 6;      ppst->param_u.fa.bestmax = 80;      ppst->param_u.fa.bestoff = 45;      if (!sw_flag_set) {	ppst->sw_flag = 0;	strncpy(m_msg->f_id1,"bs",sizeof(m_msg->f_id1));	strncpy(m_msg->alabel, align_label[1], sizeof(m_msg->alabel));      }      /* largest ktup */      mktup = 6;            if (ppst->param_u.fa.pamfact >= 0) ppst->param_u.fa.pamfact = 0;      if (ppst->param_u.fa.ktup < 0)	ppst->param_u.fa.ktup = -ppst->param_u.fa.bktup;    }    ppst->nsq = nnt;    ppst->nsqx = nntx;    for (i=0; i<=ppst->nsqx; i++) {      ppst->hsq[i] = hnt[i];      ppst->sq[i] = nt[i];      ppst->hsqx[i] = hntx[i];      ppst->sqx[i] = ntx[i];    }    ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0';    if (!ppst->pam_set) {      if (ppst->p_d_set)	mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);#if !defined(FASTS) && !defined(FASTM)      else if (ppst->pamfile[0]=='\0' || strncmp(ppst->pamfile,"BL50",4)==0) {	strncpy (ppst->pamfile, "+5/-4", sizeof(ppst->pamfile));      }#else      else if (strncmp(ppst->pamfile,"MD20",4)==0) {	strncpy (ppst->pamfile, "+2/-2", sizeof(ppst->pamfile));	ppst->p_d_mat = +2;	ppst->p_d_mis = -2;      	mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);      }#endif      pam = npam;    }    strncpy (m_msg->sqnam, "nt",sizeof(m_msg->sqnam));    strncpy (m_msg->sqtype, "DNA",sizeof(m_msg->sqtype));  }	/* end DNA reset */  else {  /* other parameters for protein comparison */    if (pgm_def_arr[pgm_id].ktup > 0) {      if (!wid_set) {	if (ppst->param_u.fa.ktup==1) ppst->param_u.fa.optwid = 32;	else ppst->param_u.fa.optwid = 16;      }    }    if (!shift_set) {ppst->gshift = pgm_def_arr[pgm_id].gshift;}    if (!subs_set) {ppst->gsubs = pgm_def_arr[pgm_id].hshift;}  }}/* query_parm()	this function asks for any additional parameters	that have not been provided.  Could be null. */voidquery_parm (struct mngmsg *m_msp, struct pstruct *ppst){   char    qline[40];   if (pgm_def_arr[ppst->pgm_id].ktup > 0) {     if (ppst->param_u.fa.ktup < 0)       ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup;     if (ppst->param_u.fa.ktup == 0) {       printf (" ktup? (1 to %d) [%d] ", mktup, ppst->param_u.fa.bktup);       if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);       else sscanf(qline,"%d",&ppst->param_u.fa.ktup);     }     if (ppst->param_u.fa.ktup == 0)       ppst->param_u.fa.ktup = ppst->param_u.fa.bktup;     else ktup_set = 1;   }#if defined(PRSS)   if (m_msp->shuff_max < 10) m_msp->shuff_max = MAX_RSTATS;   if (!mshuff_set) {     printf(" number of shuffles [%d]? ",m_msp->shuff_max);     fflush(stdout);     if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);     else sscanf(qline,"%d",&m_msp->shuff_max);   }   if (ppst->zs_win == 0) {     printf (" local (window) (w) or uniform (u) shuffle [u]? ");     if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);     else if (qline[0]=='w' || qline[0]=='W') {       m_msp->shuff_wid = 20;       printf(" local shuffle window size [%d]? ",m_msp->shuff_wid);       if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0);       else sscanf(qline,"%d",&m_msp->shuff_wid);     }   }#endif}/* last_init() cannot look at aa0, n0, because it is only run once,   it is not run before each new aa0 search */voidlast_init (struct mngmsg *m_msg, struct pstruct *ppst#ifdef PCOMPLIB	   ,int nnodes#endif	   ){  int ix_l, ix_i, i, pgm_id;  double *kar_p;  double aa0_f[MAXSQ];  pgm_id = ppst->pgm_id;#ifdef LALIGN  m_msg->do_showbest = 0;  m_msg->quiet = 1;#endif#if defined(FASTF) || defined(FASTS) || defined(FASTM)  m_msg->nohist = 1;  m_msg->shuff_max = 2000;#ifndef PCOMPLIB  ppst->shuff_node = m_msg->shuff_max/max_workers;#else  ppst->shuff_node = m_msg->shuff_max/nnodes;#endif#endif  if (m_msg->aln.llen < 1) {    m_msg->aln.llen = 60;  }#ifndef PCOMPLIB#if defined(FASTX) || defined(FASTY) || defined(TFAST)  /* set up translation tables: faatran.c */  aainit(ppst->tr_type,ppst->debug_lib);#endif#endif/* a sanity check */#if !defined(TFAST)   if (m_msg->revcomp && m_msg->qdnaseq!=SEQT_DNA && m_msg->qdnaseq!=SEQT_RNA) {     fprintf(stderr," cannot reverse complement protein\n");     m_msg->revcomp = 0;   }#endif   if (pgm_def_arr[pgm_id].ktup > 0) {     if (ppst->param_u.fa.ktup < 0)       ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup;     if (ppst->param_u.fa.ktup < 1 || ppst->param_u.fa.ktup > mktup) {       fprintf(stderr," warning ktup = %d out of range [1..%d], reset to %d\n",	       ppst->param_u.fa.ktup, mktup, ppst->param_u.fa.bktup);       ppst->param_u.fa.ktup = ppst->param_u.fa.bktup;     }   }   if (pgm_id == TFA_PID) {     m_msg->revcomp *= 3;     if (m_msg->nframe == 3) m_msg->nframe += m_msg->revcomp;   }   else if (pgm_id == TFX_PID || pgm_id == TFY_PID) {     if (m_msg->nframe == 1) m_msg->nframe += m_msg->revcomp;   }#if !defined(TFAST)  /* for fasta/fastx searches, itt iterates the the query strand */  m_msg->nitt1 = m_msg->qframe-1;#else  /* for tfasta/tfastxy searches, itt iterates the library frames */  m_msg->nitt1 = m_msg->nframe-1;#endif  if (pgm_def_arr[pgm_id].ktup > 0) {	/* its FASTA, not SSEARCH */    if (ppst->param_u.fa.ktup>=2 && !wid_set) {      ppst->param_u.fa.optwid=16;      switch (pgm_id) {      case FA_PID:	m_msg->thr_fact = 32;	break;      case FX_PID:      case FY_PID:	m_msg->thr_fact = 16;	break;      case TFA_PID:      case TFX_PID:      case TFY_PID:	m_msg->thr_fact = 8;	break;      default:	m_msg->thr_fact = 4;      }    }    else { m_msg->thr_fact = 4;}  }  else {#if !defined(SW_ALTIVEC) && !defined(SW_SSE2)    m_msg->thr_fact = 1;	/* unvectorized SSEARCH  */#else    m_msg->thr_fact = 16;	/* vectorized SSEARCH */#endif  }#if defined(PRSS)   if (m_msg->shuff_max < 10) m_msg->shuff_max = MAX_RSTATS;   if (ppst->zsflag < 10) ppst->zsflag += 10;   if (ppst->zs_win > 0) {     m_msg->shuff_wid = ppst->zs_win;   }#endif   if (pgm_def_arr[ppst->pgm_id].ktup > 0) {     if (ppst->param_u.fa.iniflag) {       ppst->score_ix = 1;       strncpy (m_msg->label, "initn init1", sizeof(m_msg->label));     }     else if (ppst->param_u.fa.optflag) {       ppst->score_ix = 2;       m_msg->stages = 1;     }   }   if (!ppst->have_pam2) {     alloc_pam (MAXSQ, MAXSQ, ppst);     init_pam2(ppst);   }   init_pamx(ppst);   if (ppst->pam_ms) {     if (m_msg->qdnaseq == SEQT_PROT) {       /* code to make 'L'/'I' identical scores */       ix_l = pascii['L'];       ix_i = pascii['I'];       ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] =	 ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] =	 max(ppst->pam2[0][ix_l][ix_l],ppst->pam2[0][ix_i][ix_i]);       for (i=1; i<=ppst->nsq; i++) {	 ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] =	   max(ppst->pam2[0][i][ix_l],ppst->pam2[0][i][ix_i]);	 ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] =	   max(ppst->pam2[0][ix_i][i],ppst->pam2[0][ix_l][i]);       }       /* code to make 'Q'/'K' identical scores */       if (!shift_set) {	 ix_l = pascii['Q'];	 ix_i = pascii['K'];	 ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] =	   ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] =	   (ppst->pam2[0][ix_l][ix_l]+ppst->pam2[0][ix_i][ix_i]+1)/2;	 for (i=1; i<=ppst->nsq; i++) {	   ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] =	     (ppst->pam2[0][i][ix_l]+ppst->pam2[0][i][ix_i]+1)/2;	   ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] =	     (ppst->pam2[0][ix_i][i]+ppst->pam2[0][ix_l][i]+1)/2;	 }       }     }   }   /*   print_pam(ppst);   */   /* once we have a complete pam matrix, we can calculate Lambda and K       for "average" sequences */   kar_p = NULL;   init_karlin_a(ppst, aa0_f, &kar_p);   do_karlin_a(ppst->pam2[0], ppst, aa0_f,	       kar_p, &m_msg->Lambda, &m_msg->K, &m_msg->H);   free(kar_p);#if defined(FASTF) || defined(FASTS) || defined(FASTM)   if (ppst->ext_sq_set) {     fprintf(stderr," -S not available on [t]fast[fs]\n");     ppst->ext_sq_set = 0;     /* reset sascii to ignore -S, map lc */     init_ascii(0,lascii,0);   }#endif}/* this function is left over from the older FASTA format scoring   matrices that allowed additional parameters (bktup, bkfact) to be   set in the scoring matrix.  It is no longer used.  A modern version   would set parameters based on lambda and K.*//*voidf_initpam (line, ppst)char   *line;struct pstruct *ppst;{   if (sscanf (line, " %d %d %d %d %d %d %d", &ppst->param_u.fa.scfact,	       &ppst->param_u.fa.bestoff, &ppst->param_u.fa.bestscale,	       &ppst->param_u.fa.bkfact, &ppst->param_u.fa.bktup,	       &ppst->param_u.fa.bestmax, &ppst->histint) != 7)   {      printf ("  bestcut parameters - bad format\n");      exit (1);   }}*//* alloc_pam2 creates a profile structure */int **alloc_pam2p(int **pam2p, int len, int nsq) {  int i, pam2p_len;  int *pam2pp;  if (pam2p == NULL) {    if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) {      fprintf(stderr," Cannot allocate pam2p: %d\n",len);      return NULL;    }    if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) {      fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len);      free(pam2p);      return NULL;    }  }  else {    pam2p_len = (nsq+1)*len*sizeof(int);    pam2pp = pam2p[0];    if ((pam2pp = (int *)realloc(pam2pp,pam2p_len))==NULL) {      fprintf(stderr,	      "Cannot reallocate pam2p[0]: %d\n", (nsq+1)*len*sizeof(int));      return NULL;    }    memset(pam2pp,0,pam2p_len);    if ((pam2p = (int **)realloc(pam2p,len*sizeof(int *)))==NULL) {      fprintf(stderr," Cannot reallocate pam2p: %d\n",len);      return NULL;    }    pam2p[0] = pam2pp;  }  for (i=1; i<len; i++) {    pam2p[i] = pam2p[0] + (i*(nsq+1));  }  return pam2p;}void free_pam2p(int **pam2p) {  if (pam2p) {    free(pam2p[0]);    free(pam2p);  }}/* sortbest has now become comparison function specific so that we can use   a different comparison for fasts/f */#if !defined(FASTS) && !defined (FASTF) && !defined(FASTM)#ifndef PCOMPLIBvoidqshuffle() {}#endif#ifndef LALIGNintlast_calc(#ifndef PCOMPLIB	  unsigned char *aa0, unsigned char *aa1, int maxn,#endif	  struct beststr **bestp_arr, int nbest,	  struct mngmsg m_msg, struct pstruct *ppst#ifdef PCOMPLIB	  , struct qmng_str *qm_msp#else	  , void **f_str#endif	  , void *pstat_str){  return nbest;}#endifvoid sortbest (bptr, nbest, irelv)struct beststr **bptr;int nbest, irelv;{    int gap, i, j;    struct beststr *tmp;    for (gap = nbest/2; gap > 0; gap /= 2)	for (i = gap; i < nbest; i++)	    for (j = i - gap; j >= 0; j-= gap) {	      if (bptr[j]->rst.score[irelv] >= bptr[j + gap]->rst.score[irelv]) break;	      tmp = bptr[j];	      bptr[j] = bptr[j + gap];	      bptr[j + gap] = tmp;	    }}void show_aux(FILE *fp, struct beststr *bptr) {}void header_aux(FILE *fp) {}#elsevoid sortbest (bptr, nbest, irelv)struct beststr **bptr;int nbest, irelv;{    int gap, i, j;    struct beststr *tmp;    for (gap = nbest/2; gap > 0; gap /= 2)	for (i = gap; i < nbest; i++)	    for (j = i - gap; j >= 0; j-= gap) {	      if (bptr[j]->rst.escore < bptr[j + gap]->rst.escore) break;	      tmp = bptr[j];	      bptr[j] = bptr[j + gap];	      bptr[j + gap] = tmp;	    }}#if defined(FASTS) || defined(FASTM)#ifndef PCOMPLIB/* this shuffle is for FASTS  *//* convert ',' -> '\0', shuffle each of the substrings */voidqshuffle(unsigned char *aa0, int n0, int nm0) {  unsigned char **aa0start, *aap, tmp;  int i,j,k, ns;  if ((aa0start=(unsigned char **)calloc(nm0+1,					 sizeof(unsigned char *)))==NULL) {    fprintf(stderr,"cannot calloc for qshuffle %d\n",nm0);    exit(1);  }  aa0start[0]=aa0;  for (k=1,i=0; i<n0; i++) {    if (aa0[i]==EOSEQ || aa0[i]==ESS) {      aa0[i]='\0';      aa0start[k++] = &aa0[i+1];    }  }    /* aa0start has the beginning of each substring */  for (k=0; k<nm0; k++) {    aap=aa0start[k];    ns = strlen((char *)aap);    for (i=ns; i>1; i--) {      j = nrand(i);      tmp = aap[j];      aap[j] = aap[i-1];      aap[i-1] = tmp;    }    aap[ns] = 0;  }  for (k=1; k<nm0; k++) {/*  aap = aa0start[k];    while (*aap) fputc(pst.sq[*aap++],stderr);    fputc('\n',stderr);*/    aa0start[k][-1]=ESS;  }  free(aa0start);}#endif#endif#ifdef FASTF#ifndef PCOMPLIBvoid qshuffle(unsigned char *aa0, int n0, int nm0) {  int i, j, k, nmpos;  unsigned char tmp;  int nmoff;    nmoff = (n0 - nm0 - 1)/nm0 + 1;  for (i = nmoff-1 ; i > 0 ; i--) {    /* j = nrand(i); if (i == j) continue;*/       /* shuffle columns */     j = (nmoff -1 ) - i;     if (i <= j) break; /* reverse columns */    /* swap all i'th column residues for all j'th column residues */    for(nmpos = 0, k = 0 ; k < nm0 ; k++, nmpos += nmoff+1 ) {      tmp = aa0[nmpos + i];      aa0[nmpos + i] = aa0[nmpos + j];      aa0[nmpos + j] = tmp;    }  }}#endif#endif/* show additional best_str values */void show_aux(FILE *fp, struct beststr *bptr) {  fprintf(fp," %2d %3d",bptr->rst.segnum,bptr->rst.seglen);}void header_aux(FILE *fp) {  fprintf(fp, " sn  sl");}#endifvoidfill_pam(int **pam2p, int n0, int nsq, double **freq2d, double scale) {  int i, j;  double freq;  /* fprintf(stderr, "scale: %g\n", scale); */    /* now fill in the pam matrix: */  for (i = 0 ; i < n0 ; i++) {    for (j = 1 ; j <=20 ; j++) {      freq = scale * freq2d[i][j-1];      if ( freq < 0.0) freq -= 0.5;      else freq += 0.5;      pam2p[i][j] = (int)(freq);    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -