📄 initfa.c
字号:
if (!del_set) {#ifdef OLD_FASTA_GAP ppst->gdelval = -16; /* def. del penalty */#else ppst->gdelval = -12; /* def. open penalty */#endif } if (!gap_set) ppst->ggapval = -4; /* def. gap penalty */ if (pgm_def_arr[pgm_id].ktup > 0) { /* these parameters are used to scale optcut, they should be replaced by statistically based parameters */ if (!wid_set) ppst->param_u.fa.optwid = 16; ppst->param_u.fa.bestscale = 80; ppst->param_u.fa.bkfact = 5; ppst->param_u.fa.scfact = 1; ppst->param_u.fa.bktup = 6; ppst->param_u.fa.bestmax = 80; ppst->param_u.fa.bestoff = 45; if (!sw_flag_set) { ppst->sw_flag = 0; strncpy(m_msg->f_id1,"bs",sizeof(m_msg->f_id1)); strncpy(m_msg->alabel, align_label[1], sizeof(m_msg->alabel)); } /* largest ktup */ mktup = 6; if (ppst->param_u.fa.pamfact >= 0) ppst->param_u.fa.pamfact = 0; if (ppst->param_u.fa.ktup < 0) ppst->param_u.fa.ktup = -ppst->param_u.fa.bktup; } ppst->nsq = nnt; ppst->nsqx = nntx; for (i=0; i<=ppst->nsqx; i++) { ppst->hsq[i] = hnt[i]; ppst->sq[i] = nt[i]; ppst->hsqx[i] = hntx[i]; ppst->sqx[i] = ntx[i]; } ppst->sq[ppst->nsqx+1] = ppst->sqx[ppst->nsqx+1] = '\0'; if (!ppst->pam_set) { if (ppst->p_d_set) mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis);#if !defined(FASTS) && !defined(FASTM) else if (ppst->pamfile[0]=='\0' || strncmp(ppst->pamfile,"BL50",4)==0) { strncpy (ppst->pamfile, "+5/-4", sizeof(ppst->pamfile)); }#else else if (strncmp(ppst->pamfile,"MD20",4)==0) { strncpy (ppst->pamfile, "+2/-2", sizeof(ppst->pamfile)); ppst->p_d_mat = +2; ppst->p_d_mis = -2; mk_n_pam(npam,nnt,ppst->p_d_mat,ppst->p_d_mis); }#endif pam = npam; } strncpy (m_msg->sqnam, "nt",sizeof(m_msg->sqnam)); strncpy (m_msg->sqtype, "DNA",sizeof(m_msg->sqtype)); } /* end DNA reset */ else { /* other parameters for protein comparison */ if (pgm_def_arr[pgm_id].ktup > 0) { if (!wid_set) { if (ppst->param_u.fa.ktup==1) ppst->param_u.fa.optwid = 32; else ppst->param_u.fa.optwid = 16; } } if (!shift_set) {ppst->gshift = pgm_def_arr[pgm_id].gshift;} if (!subs_set) {ppst->gsubs = pgm_def_arr[pgm_id].hshift;} }}/* query_parm() this function asks for any additional parameters that have not been provided. Could be null. */voidquery_parm (struct mngmsg *m_msp, struct pstruct *ppst){ char qline[40]; if (pgm_def_arr[ppst->pgm_id].ktup > 0) { if (ppst->param_u.fa.ktup < 0) ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup; if (ppst->param_u.fa.ktup == 0) { printf (" ktup? (1 to %d) [%d] ", mktup, ppst->param_u.fa.bktup); if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0); else sscanf(qline,"%d",&ppst->param_u.fa.ktup); } if (ppst->param_u.fa.ktup == 0) ppst->param_u.fa.ktup = ppst->param_u.fa.bktup; else ktup_set = 1; }#if defined(PRSS) if (m_msp->shuff_max < 10) m_msp->shuff_max = MAX_RSTATS; if (!mshuff_set) { printf(" number of shuffles [%d]? ",m_msp->shuff_max); fflush(stdout); if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0); else sscanf(qline,"%d",&m_msp->shuff_max); } if (ppst->zs_win == 0) { printf (" local (window) (w) or uniform (u) shuffle [u]? "); if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0); else if (qline[0]=='w' || qline[0]=='W') { m_msp->shuff_wid = 20; printf(" local shuffle window size [%d]? ",m_msp->shuff_wid); if (fgets (qline, sizeof(qline), stdin) == NULL) exit (0); else sscanf(qline,"%d",&m_msp->shuff_wid); } }#endif}/* last_init() cannot look at aa0, n0, because it is only run once, it is not run before each new aa0 search */voidlast_init (struct mngmsg *m_msg, struct pstruct *ppst#ifdef PCOMPLIB ,int nnodes#endif ){ int ix_l, ix_i, i, pgm_id; double *kar_p; double aa0_f[MAXSQ]; pgm_id = ppst->pgm_id;#ifdef LALIGN m_msg->do_showbest = 0; m_msg->quiet = 1;#endif#if defined(FASTF) || defined(FASTS) || defined(FASTM) m_msg->nohist = 1; m_msg->shuff_max = 2000;#ifndef PCOMPLIB ppst->shuff_node = m_msg->shuff_max/max_workers;#else ppst->shuff_node = m_msg->shuff_max/nnodes;#endif#endif if (m_msg->aln.llen < 1) { m_msg->aln.llen = 60; }#ifndef PCOMPLIB#if defined(FASTX) || defined(FASTY) || defined(TFAST) /* set up translation tables: faatran.c */ aainit(ppst->tr_type,ppst->debug_lib);#endif#endif/* a sanity check */#if !defined(TFAST) if (m_msg->revcomp && m_msg->qdnaseq!=SEQT_DNA && m_msg->qdnaseq!=SEQT_RNA) { fprintf(stderr," cannot reverse complement protein\n"); m_msg->revcomp = 0; }#endif if (pgm_def_arr[pgm_id].ktup > 0) { if (ppst->param_u.fa.ktup < 0) ppst->param_u.fa.ktup = -ppst->param_u.fa.ktup; if (ppst->param_u.fa.ktup < 1 || ppst->param_u.fa.ktup > mktup) { fprintf(stderr," warning ktup = %d out of range [1..%d], reset to %d\n", ppst->param_u.fa.ktup, mktup, ppst->param_u.fa.bktup); ppst->param_u.fa.ktup = ppst->param_u.fa.bktup; } } if (pgm_id == TFA_PID) { m_msg->revcomp *= 3; if (m_msg->nframe == 3) m_msg->nframe += m_msg->revcomp; } else if (pgm_id == TFX_PID || pgm_id == TFY_PID) { if (m_msg->nframe == 1) m_msg->nframe += m_msg->revcomp; }#if !defined(TFAST) /* for fasta/fastx searches, itt iterates the the query strand */ m_msg->nitt1 = m_msg->qframe-1;#else /* for tfasta/tfastxy searches, itt iterates the library frames */ m_msg->nitt1 = m_msg->nframe-1;#endif if (pgm_def_arr[pgm_id].ktup > 0) { /* its FASTA, not SSEARCH */ if (ppst->param_u.fa.ktup>=2 && !wid_set) { ppst->param_u.fa.optwid=16; switch (pgm_id) { case FA_PID: m_msg->thr_fact = 32; break; case FX_PID: case FY_PID: m_msg->thr_fact = 16; break; case TFA_PID: case TFX_PID: case TFY_PID: m_msg->thr_fact = 8; break; default: m_msg->thr_fact = 4; } } else { m_msg->thr_fact = 4;} } else {#if !defined(SW_ALTIVEC) && !defined(SW_SSE2) m_msg->thr_fact = 1; /* unvectorized SSEARCH */#else m_msg->thr_fact = 16; /* vectorized SSEARCH */#endif }#if defined(PRSS) if (m_msg->shuff_max < 10) m_msg->shuff_max = MAX_RSTATS; if (ppst->zsflag < 10) ppst->zsflag += 10; if (ppst->zs_win > 0) { m_msg->shuff_wid = ppst->zs_win; }#endif if (pgm_def_arr[ppst->pgm_id].ktup > 0) { if (ppst->param_u.fa.iniflag) { ppst->score_ix = 1; strncpy (m_msg->label, "initn init1", sizeof(m_msg->label)); } else if (ppst->param_u.fa.optflag) { ppst->score_ix = 2; m_msg->stages = 1; } } if (!ppst->have_pam2) { alloc_pam (MAXSQ, MAXSQ, ppst); init_pam2(ppst); } init_pamx(ppst); if (ppst->pam_ms) { if (m_msg->qdnaseq == SEQT_PROT) { /* code to make 'L'/'I' identical scores */ ix_l = pascii['L']; ix_i = pascii['I']; ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] = ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] = max(ppst->pam2[0][ix_l][ix_l],ppst->pam2[0][ix_i][ix_i]); for (i=1; i<=ppst->nsq; i++) { ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] = max(ppst->pam2[0][i][ix_l],ppst->pam2[0][i][ix_i]); ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] = max(ppst->pam2[0][ix_i][i],ppst->pam2[0][ix_l][i]); } /* code to make 'Q'/'K' identical scores */ if (!shift_set) { ix_l = pascii['Q']; ix_i = pascii['K']; ppst->pam2[0][ix_l][ix_i] = ppst->pam2[0][ix_i][ix_l] = ppst->pam2[0][ix_l][ix_l] = ppst->pam2[0][ix_i][ix_i] = (ppst->pam2[0][ix_l][ix_l]+ppst->pam2[0][ix_i][ix_i]+1)/2; for (i=1; i<=ppst->nsq; i++) { ppst->pam2[0][i][ix_i] = ppst->pam2[0][i][ix_l] = (ppst->pam2[0][i][ix_l]+ppst->pam2[0][i][ix_i]+1)/2; ppst->pam2[0][ix_i][i] = ppst->pam2[0][ix_l][i] = (ppst->pam2[0][ix_i][i]+ppst->pam2[0][ix_l][i]+1)/2; } } } } /* print_pam(ppst); */ /* once we have a complete pam matrix, we can calculate Lambda and K for "average" sequences */ kar_p = NULL; init_karlin_a(ppst, aa0_f, &kar_p); do_karlin_a(ppst->pam2[0], ppst, aa0_f, kar_p, &m_msg->Lambda, &m_msg->K, &m_msg->H); free(kar_p);#if defined(FASTF) || defined(FASTS) || defined(FASTM) if (ppst->ext_sq_set) { fprintf(stderr," -S not available on [t]fast[fs]\n"); ppst->ext_sq_set = 0; /* reset sascii to ignore -S, map lc */ init_ascii(0,lascii,0); }#endif}/* this function is left over from the older FASTA format scoring matrices that allowed additional parameters (bktup, bkfact) to be set in the scoring matrix. It is no longer used. A modern version would set parameters based on lambda and K.*//*voidf_initpam (line, ppst)char *line;struct pstruct *ppst;{ if (sscanf (line, " %d %d %d %d %d %d %d", &ppst->param_u.fa.scfact, &ppst->param_u.fa.bestoff, &ppst->param_u.fa.bestscale, &ppst->param_u.fa.bkfact, &ppst->param_u.fa.bktup, &ppst->param_u.fa.bestmax, &ppst->histint) != 7) { printf (" bestcut parameters - bad format\n"); exit (1); }}*//* alloc_pam2 creates a profile structure */int **alloc_pam2p(int **pam2p, int len, int nsq) { int i, pam2p_len; int *pam2pp; if (pam2p == NULL) { if ((pam2p = (int **)calloc(len,sizeof(int *)))==NULL) { fprintf(stderr," Cannot allocate pam2p: %d\n",len); return NULL; } if((pam2p[0] = (int *)calloc((nsq+1)*len,sizeof(int)))==NULL) { fprintf(stderr, "Cannot allocate pam2p[0]: %d\n", (nsq+1)*len); free(pam2p); return NULL; } } else { pam2p_len = (nsq+1)*len*sizeof(int); pam2pp = pam2p[0]; if ((pam2pp = (int *)realloc(pam2pp,pam2p_len))==NULL) { fprintf(stderr, "Cannot reallocate pam2p[0]: %d\n", (nsq+1)*len*sizeof(int)); return NULL; } memset(pam2pp,0,pam2p_len); if ((pam2p = (int **)realloc(pam2p,len*sizeof(int *)))==NULL) { fprintf(stderr," Cannot reallocate pam2p: %d\n",len); return NULL; } pam2p[0] = pam2pp; } for (i=1; i<len; i++) { pam2p[i] = pam2p[0] + (i*(nsq+1)); } return pam2p;}void free_pam2p(int **pam2p) { if (pam2p) { free(pam2p[0]); free(pam2p); }}/* sortbest has now become comparison function specific so that we can use a different comparison for fasts/f */#if !defined(FASTS) && !defined (FASTF) && !defined(FASTM)#ifndef PCOMPLIBvoidqshuffle() {}#endif#ifndef LALIGNintlast_calc(#ifndef PCOMPLIB unsigned char *aa0, unsigned char *aa1, int maxn,#endif struct beststr **bestp_arr, int nbest, struct mngmsg m_msg, struct pstruct *ppst#ifdef PCOMPLIB , struct qmng_str *qm_msp#else , void **f_str#endif , void *pstat_str){ return nbest;}#endifvoid sortbest (bptr, nbest, irelv)struct beststr **bptr;int nbest, irelv;{ int gap, i, j; struct beststr *tmp; for (gap = nbest/2; gap > 0; gap /= 2) for (i = gap; i < nbest; i++) for (j = i - gap; j >= 0; j-= gap) { if (bptr[j]->rst.score[irelv] >= bptr[j + gap]->rst.score[irelv]) break; tmp = bptr[j]; bptr[j] = bptr[j + gap]; bptr[j + gap] = tmp; }}void show_aux(FILE *fp, struct beststr *bptr) {}void header_aux(FILE *fp) {}#elsevoid sortbest (bptr, nbest, irelv)struct beststr **bptr;int nbest, irelv;{ int gap, i, j; struct beststr *tmp; for (gap = nbest/2; gap > 0; gap /= 2) for (i = gap; i < nbest; i++) for (j = i - gap; j >= 0; j-= gap) { if (bptr[j]->rst.escore < bptr[j + gap]->rst.escore) break; tmp = bptr[j]; bptr[j] = bptr[j + gap]; bptr[j + gap] = tmp; }}#if defined(FASTS) || defined(FASTM)#ifndef PCOMPLIB/* this shuffle is for FASTS *//* convert ',' -> '\0', shuffle each of the substrings */voidqshuffle(unsigned char *aa0, int n0, int nm0) { unsigned char **aa0start, *aap, tmp; int i,j,k, ns; if ((aa0start=(unsigned char **)calloc(nm0+1, sizeof(unsigned char *)))==NULL) { fprintf(stderr,"cannot calloc for qshuffle %d\n",nm0); exit(1); } aa0start[0]=aa0; for (k=1,i=0; i<n0; i++) { if (aa0[i]==EOSEQ || aa0[i]==ESS) { aa0[i]='\0'; aa0start[k++] = &aa0[i+1]; } } /* aa0start has the beginning of each substring */ for (k=0; k<nm0; k++) { aap=aa0start[k]; ns = strlen((char *)aap); for (i=ns; i>1; i--) { j = nrand(i); tmp = aap[j]; aap[j] = aap[i-1]; aap[i-1] = tmp; } aap[ns] = 0; } for (k=1; k<nm0; k++) {/* aap = aa0start[k]; while (*aap) fputc(pst.sq[*aap++],stderr); fputc('\n',stderr);*/ aa0start[k][-1]=ESS; } free(aa0start);}#endif#endif#ifdef FASTF#ifndef PCOMPLIBvoid qshuffle(unsigned char *aa0, int n0, int nm0) { int i, j, k, nmpos; unsigned char tmp; int nmoff; nmoff = (n0 - nm0 - 1)/nm0 + 1; for (i = nmoff-1 ; i > 0 ; i--) { /* j = nrand(i); if (i == j) continue;*/ /* shuffle columns */ j = (nmoff -1 ) - i; if (i <= j) break; /* reverse columns */ /* swap all i'th column residues for all j'th column residues */ for(nmpos = 0, k = 0 ; k < nm0 ; k++, nmpos += nmoff+1 ) { tmp = aa0[nmpos + i]; aa0[nmpos + i] = aa0[nmpos + j]; aa0[nmpos + j] = tmp; } }}#endif#endif/* show additional best_str values */void show_aux(FILE *fp, struct beststr *bptr) { fprintf(fp," %2d %3d",bptr->rst.segnum,bptr->rst.seglen);}void header_aux(FILE *fp) { fprintf(fp, " sn sl");}#endifvoidfill_pam(int **pam2p, int n0, int nsq, double **freq2d, double scale) { int i, j; double freq; /* fprintf(stderr, "scale: %g\n", scale); */ /* now fill in the pam matrix: */ for (i = 0 ; i < n0 ; i++) { for (j = 1 ; j <=20 ; j++) { freq = scale * freq2d[i][j-1]; if ( freq < 0.0) freq -= 0.5; else freq += 0.5; pam2p[i][j] = (int)(freq); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -