📄 prfalign.c

📁 是有关基因比对的经典算法的实现。这对于初学计算生物学的人是非常重要的算法。
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
   ta=ckfree((void *)ta);

   return(mask);
}

static lint prfscore(sint n, sint m)
{
   sint    ix;
   lint  score;

   score = 0.0;
   for (ix=0; ix<=max_aa; ix++)
     {
         score += (profile1[n][ix] * profile2[m][ix]);
     }
   score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]);
   score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]);
   return(score/10);
   
}

static void ptracepath(sint *alen)
{
    sint i,j,k,pos,to_do;

    pos = 0;

    to_do=print_ptr-1;

    for(i=1;i<=to_do;++i) {
if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]);
            if(displ[i]==0) {
                    aln_path1[pos]=2;
                    aln_path2[pos]=2;
                    ++pos;
            }
            else {
                    if((k=displ[i])>0) {
                            for(j=0;j<=k-1;++j) {
                                    aln_path2[pos+j]=2;
                                    aln_path1[pos+j]=1;
                            }
                            pos += k;
                    }
                    else {
                            k = (displ[i]<0) ? displ[i] * -1 : displ[i];
                            for(j=0;j<=k-1;++j) {
                                    aln_path1[pos+j]=2;
                                    aln_path2[pos+j]=1;
                            }
                            pos += k;
                    }
            }
    }
if (debug>1) fprintf(stdout,"\n");

   (*alen) = pos;

}

static void pdel(sint k)
{
        if(last_print<0)
                last_print = displ[print_ptr-1] -= k;
        else
                last_print = displ[print_ptr++] = -(k);
}

static void padd(sint k)
{

        if(last_print<0) {
                displ[print_ptr-1] = k;
                displ[print_ptr++] = last_print;
        }
        else
                last_print = displ[print_ptr++] = k;
}

static void palign(void)
{
        displ[print_ptr++] = last_print = 0;
}


static lint pdiff(sint A,sint B,sint M,sint N,sint go1, sint go2)
{
        sint midi,midj,type;
        lint midh;

        static lint t, tl, g, h;

{		static sint i,j;
        static lint hh, f, e, s;

/* Boundary cases: M <= 1 or N == 0 */
if (debug>2) fprintf(stdout,"A %d B %d M %d N %d midi %d go1 %d go2 %d\n", 
(pint)A,(pint)B,(pint)M,(pint)N,(pint)M/2,(pint)go1,(pint)go2);

/* if sequence B is empty....                                            */

        if(N<=0)  {

/* if sequence A is not empty....                                        */

                if(M>0) {

/* delete residues A[1] to A[M]                                          */

                        pdel(M);
                }
                return(-gap_penalty1(A,B,M));
        }

/* if sequence A is empty....                                            */

        if(M<=1) {
                if(M<=0) {

/* insert residues B[1] to B[N]                                          */

                        padd(N);
                        return(-gap_penalty2(A,B,N));
                }

/* if sequence A has just one residue....                                */

                if (go1 == 0)
                	midh =  -gap_penalty1(A+1,B+1,N);
                else
                	midh =  -gap_penalty2(A+1,B,1)-gap_penalty1(A+1,B+1,N);
                midj = 0;
                for(j=1;j<=N;j++) {
                        hh = -gap_penalty1(A,B+1,j-1) + prfscore(A+1,B+j)
                            -gap_penalty1(A+1,B+j+1,N-j);
                        if(hh>midh) {
                                midh = hh;
                                midj = j;
                        }
                }

                if(midj==0) {
                        padd(N);
                        pdel(1);
                }
                else {
                        if(midj>1) padd(midj-1);
                        palign();
                        if(midj<N) padd(N-midj);
                }
                return midh;
        }


/* Divide sequence A in half: midi */

        midi = M / 2;

/* In a forward phase, calculate all HH[j] and HH[j] */

        HH[0] = 0.0;
        t = -open_penalty1(A,B+1);
        tl = -ext_penalty1(A,B+1);
        for(j=1;j<=N;j++) {
                HH[j] = t = t+tl;
                DD[j] = t-open_penalty2(A+1,B+j);
        }

		if (go1 == 0) t = 0;
		else t = -open_penalty2(A+1,B);
        tl = -ext_penalty2(A+1,B);
        for(i=1;i<=midi;i++) {
                s = HH[0];
                HH[0] = hh = t = t+tl;
                f = t-open_penalty1(A+i,B+1);

                for(j=1;j<=N;j++) {
                	g = open_penalty1(A+i,B+j);
                	h = ext_penalty1(A+i,B+j);
                        if ((hh=hh-g-h) > (f=f-h)) f=hh;
                	g = open_penalty2(A+i,B+j);
                	h = ext_penalty2(A+i,B+j);
                        if ((hh=HH[j]-g-h) > (e=DD[j]-h)) e=hh;
                        hh = s + prfscore(A+i, B+j);
                        if (f>hh) hh = f;
                        if (e>hh) hh = e;

                        s = HH[j];
                        HH[j] = hh;
                        DD[j] = e;

                }
        }

        DD[0]=HH[0];

/* In a reverse phase, calculate all RR[j] and SS[j] */

        RR[N]=0.0;
        tl = 0.0;
        for(j=N-1;j>=0;j--) {
                g = -open_penalty1(A+M,B+j+1);
                tl -= ext_penalty1(A+M,B+j+1);
                RR[j] = g+tl;
                SS[j] = RR[j]-open_penalty2(A+M,B+j);
                gS[j] = open_penalty2(A+M,B+j);
        }

        tl = 0.0;
        for(i=M-1;i>=midi;i--) {
                s = RR[N];
                if (go2 == 0) g = 0;
                else g = -open_penalty2(A+i+1,B+N);
                tl -= ext_penalty2(A+i+1,B+N);
                RR[N] = hh = g+tl;
                t = open_penalty1(A+i,B+N);
                f = RR[N]-t;

                for(j=N-1;j>=0;j--) {
                	g = open_penalty1(A+i,B+j+1);
                	h = ext_penalty1(A+i,B+j+1);
                        if ((hh=hh-g-h) > (f=f-h-g+t)) f=hh;
                        t = g;
                	g = open_penalty2(A+i+1,B+j);
                	h = ext_penalty2(A+i+1,B+j);
                        hh=RR[j]-g-h;
                        if (i==(M-1)) {
				 e=SS[j]-h;
			}
                        else {
				e=SS[j]-h-g+open_penalty2(A+i+2,B+j);
				gS[j] = g;
			}
                        if (hh > e) e=hh;
                        hh = s + prfscore(A+i+1, B+j+1);
                        if (f>hh) hh = f;
                        if (e>hh) hh = e;

                        s = RR[j];
                        RR[j] = hh;
                        SS[j] = e;

                }
        }
        SS[N]=RR[N];
        gS[N] = open_penalty2(A+midi+1,B+N);

/* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */

        midh=HH[0]+RR[0];
        midj=0;
        type=1;
        for(j=0;j<=N;j++) {
                hh = HH[j] + RR[j];
                if(hh>=midh)
                        if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) {
                                midh=hh;
                                midj=j;
                        }
        }

        for(j=N;j>=0;j--) {
                hh = DD[j] + SS[j] + gS[j];
                if(hh>midh) {
                        midh=hh;
                        midj=j;
                        type=2;
                }
        }
}

/* Conquer recursively around midpoint                                   */


        if(type==1) {             /* Type 1 gaps  */
if (debug>2) fprintf(stdout,"Type 1,1: midj %d\n",(pint)midj);
                pdiff(A,B,midi,midj,go1,1);
if (debug>2) fprintf(stdout,"Type 1,2: midj %d\n",(pint)midj);
                pdiff(A+midi,B+midj,M-midi,N-midj,1,go2);
        }
        else {
if (debug>2) fprintf(stdout,"Type 2,1: midj %d\n",(pint)midj);
                pdiff(A,B,midi-1,midj,go1, 0);
                pdel(2);
if (debug>2) fprintf(stdout,"Type 2,2: midj %d\n",(pint)midj);
                pdiff(A+midi+1,B+midj,M-midi-1,N-midj,0,go2);
        }

        return midh;       /* Return the score of the best alignment */
}

/* calculate the score for opening a gap at residues A[i] and B[j]       */

static sint open_penalty1(sint i, sint j)
{
   sint g;

   if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);

   g = profile2[j][GAPCOL] + profile1[i][GAPCOL];
   return(g);
}

/* calculate the score for extending an existing gap at A[i] and B[j]    */

static sint ext_penalty1(sint i, sint j)
{
   sint h;

   if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);

   h = profile2[j][LENCOL];
   return(h);
}

/* calculate the score for a gap of length k, at residues A[i] and B[j]  */

static sint gap_penalty1(sint i, sint j, sint k)
{
   sint ix;
   sint gp;
   sint g, h = 0;

   if (k <= 0) return(0);
   if (!endgappenalties &&(i==0 || i==prf_length1)) return(0);

   g = profile2[j][GAPCOL] + profile1[i][GAPCOL];
   for (ix=0;ix<k && ix+j<prf_length2;ix++)
      h = profile2[ix+j][LENCOL];

   gp = g + h * k;
   return(gp);
}
/* calculate the score for opening a gap at residues A[i] and B[j]       */

static sint open_penalty2(sint i, sint j)
{
   sint g;

   if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);

   g = profile1[i][GAPCOL] + profile2[j][GAPCOL];
   return(g);
}

/* calculate the score for extending an existing gap at A[i] and B[j]    */

static sint ext_penalty2(sint i, sint j)
{
   sint h;

   if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);

   h = profile1[i][LENCOL];
   return(h);
}

/* calculate the score for a gap of length k, at residues A[i] and B[j]  */

static sint gap_penalty2(sint i, sint j, sint k)
{
   sint ix;
   sint gp;
   sint g, h = 0;

   if (k <= 0) return(0);
   if (!endgappenalties &&(j==0 || j==prf_length2)) return(0);

   g = profile1[i][GAPCOL] + profile2[j][GAPCOL];
   for (ix=0;ix<k && ix+i<prf_length1;ix++)
      h = profile1[ix+i][LENCOL];

   gp = g + h * k;
   return(gp);
}
上一页 1 23
💿 文件大小 848 K
👤 上传用户 nassdaq
📂 所属分类 *行业应用
🏷️ 相关标签

#算法 #基因 #生物学 #计算
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -