⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prfalign.c

📁 是有关基因比对的经典算法的实现。这对于初学计算生物学的人是非常重要的算法。
💻 C
📖 第 1 页 / 共 3 页
字号:
        }
       else if (strcmp(mtrxname, "id") == 0)
        {
           matptr = idmat;
           mat_xref = def_aa_xref;
        }
       else if(user_series)
        {
           matptr=NULL;
	   found=FALSE;
	   for(i=0;i<matseries.nmat;i++)
		if(pcid>=matseries.mat[i].llimit && pcid<=matseries.mat[i].ulimit)
		{
			j=i;
			found=TRUE;
			break;
		}
	   if(found==FALSE)
	   {
		if(!error_given)
		warning(
"\nSeries matrix not found for sequence percent identity = %d.\n"
"(Using first matrix in series as a default.)\n"
"This alignment may not be optimal!\n"
"SUGGESTION: Check your matrix series input file and try again.",(int)pcid);
		error_given=TRUE;
		j=0;
	   }
if (debug>0) fprintf(stdout,"pcid %d  matrix %d\n",(pint)pcid,(pint)j+1);

           matptr = matseries.mat[j].matptr;
           mat_xref = matseries.mat[j].aa_xref;
/* this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit */
           scale=0.5+(pcid-matseries.mat[j].llimit)/((matseries.mat[j].ulimit-matseries.mat[j].llimit)*2.0);
        }
       else 
        {
           matptr = usermat;
           mat_xref = aa_xref;
        }
if(debug>0) fprintf(stdout,"pcid %3.1f scale %3.1f\n",pcid,scale);
      	maxres = get_matrix(matptr, mat_xref, matrix, negative, int_scale);
      if (maxres == 0)
        {
           fprintf(stdout,"Error: matrix %s not found\n", mtrxname);
           return(-1);
        }

          if (negative) {
              gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open);
              lencoef1 = lencoef2 = 100.0 * gap_extend;
	  }
          else {
          if (mat_avscore <= 0)
              gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open + logmin);
	  else
              gapcoef1 = gapcoef2 = scale * mat_avscore * (float)(gap_open/(logdiff*logmin));
              lencoef1 = lencoef2 = 100.0 * gap_extend;
	 }
    }
if (debug>0)
{
fprintf(stdout,"matavscore %d\n",mat_avscore);
fprintf(stdout,"Gap Open1 %d  Gap Open2 %d  Gap Extend1 %d   Gap Extend2 %d\n",
   (pint)gapcoef1,(pint)gapcoef2, (pint)lencoef1,(pint)lencoef2);
fprintf(stdout,"Matrix  %s\n", mtrxname);
}

  profile1 = (sint **) ckalloc( (prf_length1+2) * sizeof (sint *) );
  for(i=0; i<prf_length1+2; i++)
       profile1[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );

  profile2 = (sint **) ckalloc( (prf_length2+2) * sizeof (sint *) );
  for(i=0; i<prf_length2+2; i++)
       profile2[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );

/*
  calculate the Gap Coefficients.
*/
     gaps = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );

     if (switch_profiles == FALSE)
        calc_gap_coeff(alignment, gaps, profile1, (struct_penalties1 && use_ss1), gap_penalty_mask1,
           (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
     else
        calc_gap_coeff(alignment, gaps, profile1, (struct_penalties2 && use_ss2), gap_penalty_mask2,
           (sint)0, nseqs1, prf_length1, gapcoef1, lencoef1);
/*
  calculate the profile matrix.
*/
     calc_prf1(profile1, alignment, gaps, matrix,
          aln_weight, prf_length1, (sint)0, nseqs1);

if (debug>4)
{
extern char *amino_acid_codes;
  for (j=0;j<=max_aa;j++)
    fprintf(stdout,"%c    ", amino_acid_codes[j]);
 fprintf(stdout,"\n");
  for (i=0;i<prf_length1;i++)
   {
    for (j=0;j<=max_aa;j++)
      fprintf(stdout,"%d ", (pint)profile1[i+1][j]);
    fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos1]);
    fprintf(stdout,"%d ", (pint)profile1[i+1][gap_pos2]);
    fprintf(stdout,"%d %d\n",(pint)profile1[i+1][GAPCOL],(pint)profile1[i+1][LENCOL]);
   }
}

/*
  calculate the Gap Coefficients.
*/

     if (switch_profiles == FALSE)
        calc_gap_coeff(alignment, gaps, profile2, (struct_penalties2 && use_ss2), gap_penalty_mask2,
           nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
     else
        calc_gap_coeff(alignment, gaps, profile2, (struct_penalties1 && use_ss1), gap_penalty_mask1,
           nseqs1, nseqs1+nseqs2, prf_length2, gapcoef2, lencoef2);
/*
  calculate the profile matrix.
*/
     calc_prf2(profile2, alignment, aln_weight,
           prf_length2, nseqs1, nseqs1+nseqs2);

     aln_weight=ckfree((void *)aln_weight);

if (debug>4)
{
extern char *amino_acid_codes;
  for (j=0;j<=max_aa;j++)
    fprintf(stdout,"%c    ", amino_acid_codes[j]);
 fprintf(stdout,"\n");
  for (i=0;i<prf_length2;i++)
   {
    for (j=0;j<=max_aa;j++)
      fprintf(stdout,"%d ", (pint)profile2[i+1][j]);
    fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos1]);
    fprintf(stdout,"%d ", (pint)profile2[i+1][gap_pos2]);
    fprintf(stdout,"%d %d\n",(pint)profile2[i+1][GAPCOL],(pint)profile2[i+1][LENCOL]);
   }
}

  aln_path1 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );
  aln_path2 = (char *) ckalloc( (max_aln_length+1) * sizeof(char) );


/*
   align the profiles
*/
/* use Myers and Miller to align two sequences */

  last_print = 0;
  print_ptr = 1;

  sb1 = sb2 = 0;
  se1 = prf_length1;
  se2 = prf_length2;

  HH = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
  DD = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
  RR = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
  SS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
  gS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) );
  displ = (sint *) ckalloc( (max_aln_length+1) * sizeof (sint) );

  score = pdiff(sb1, sb2, se1-sb1, se2-sb2, profile1[0][GAPCOL], profile1[prf_length1][GAPCOL]);

  HH=ckfree((void *)HH);
  DD=ckfree((void *)DD);
  RR=ckfree((void *)RR);
  SS=ckfree((void *)SS);
  gS=ckfree((void *)gS);

  ptracepath( &alignment_len);
  
  displ=ckfree((void *)displ);

  add_ggaps();

  for (i=0;i<prf_length1+2;i++)
     profile1[i]=ckfree((void *)profile1[i]);
  profile1=ckfree((void *)profile1);

  for (i=0;i<prf_length2+2;i++)
     profile2[i]=ckfree((void *)profile2[i]);
  profile2=ckfree((void *)profile2);

  prf_length1 = alignment_len;

  aln_path1=ckfree((void *)aln_path1);
  aln_path2=ckfree((void *)aln_path2);

  NumSeq = 0;
  for (j=0;j<nseqs;j++)
    {
       if (group[j+1]  == 1)
         {
            seqlen_array[j+1] = prf_length1;
	    realloc_seq(j+1,prf_length1);
            for (i=0;i<prf_length1;i++)
              seq_array[j+1][i+1] = alignment[NumSeq][i];
            NumSeq++;
         }
    }
  for (j=0;j<nseqs;j++)
    {
       if (group[j+1]  == 2)
         {
            seqlen_array[j+1] = prf_length1;
            seq_array[j+1] = (char *)realloc(seq_array[j+1], (prf_length1+2) * sizeof (char));
	    realloc_seq(j+1,prf_length1);
            for (i=0;i<prf_length1;i++)
              seq_array[j+1][i+1] = alignment[NumSeq][i];
            NumSeq++;
         }
    }

  for (i=0;i<nseqs1+nseqs2;i++)
     alignment[i]=ckfree((void *)alignment[i]);
  alignment=ckfree((void *)alignment);

  aln_len=ckfree((void *)aln_len);
  gaps=ckfree((void *)gaps);

  return(score/100);
}

static void add_ggaps(void)
{
   sint j;
   sint i,ix;
   sint len;
   char *ta;

   ta = (char *) ckalloc( (alignment_len+1) * sizeof (char) );

   for (j=0;j<nseqs1;j++)
     {
      ix = 0;
      for (i=0;i<alignment_len;i++)
        {
           if (aln_path1[i] == 2)
              {
                 if (ix < aln_len[j])
                    ta[i] = alignment[j][ix];
                 else 
                    ta[i] = ENDALN;
                 ix++;
              }
           else if (aln_path1[i] == 1)
              {
/*
   insertion in first alignment...
*/
                 ta[i] = gap_pos1;
              }
           else
              {
                 fprintf(stdout,"Error in aln_path\n");
              }
         }
       ta[i] = ENDALN;
       
       len = alignment_len;
       alignment[j] = (char *)realloc(alignment[j], (len+2) * sizeof (char));
       for (i=0;i<len;i++)
         alignment[j][i] = ta[i];
       alignment[j][i] = ENDALN;
       aln_len[j] = len;
      }

   for (j=nseqs1;j<nseqs1+nseqs2;j++)
     {
      ix = 0;
      for (i=0;i<alignment_len;i++)
        {
           if (aln_path2[i] == 2)
              {
                 if (ix < aln_len[j])
                    ta[i] = alignment[j][ix];
                 else 
                    ta[i] = ENDALN;
                 ix++;
              }
           else if (aln_path2[i] == 1)
              {
/*
   insertion in second alignment...
*/
                 ta[i] = gap_pos1;
              }
           else
              {
                 fprintf(stdout,"Error in aln_path\n");
              }
         }
       ta[i] = ENDALN;
       
       len = alignment_len;
       alignment[j] = (char *) realloc(alignment[j], (len+2) * sizeof (char) );
       for (i=0;i<len;i++)
         alignment[j][i] = ta[i];
       alignment[j][i] = ENDALN;
       aln_len[j] = len;
      }
      
   ta=ckfree((void *)ta);

   if (struct_penalties1 != NONE)
       gap_penalty_mask1 = add_ggaps_mask(gap_penalty_mask1,alignment_len,aln_path1,aln_path2);
   if (struct_penalties1 == SECST)
       sec_struct_mask1 = add_ggaps_mask(sec_struct_mask1,alignment_len,aln_path1,aln_path2);
     
   if (struct_penalties2 != NONE)
       gap_penalty_mask2 = add_ggaps_mask(gap_penalty_mask2,alignment_len,aln_path2,aln_path1);
   if (struct_penalties2 == SECST)
       sec_struct_mask2 = add_ggaps_mask(sec_struct_mask2,alignment_len,aln_path2,aln_path1);

if (debug>0)
{
  char c;
  extern char *amino_acid_codes;

   for (i=0;i<nseqs1+nseqs2;i++)
     {
      for (j=0;j<alignment_len;j++)
       {
        if (alignment[i][j] == ENDALN) break;
        else if ((alignment[i][j] == gap_pos1) || (alignment[i][j] == gap_pos2))  c = '-';
        else c = amino_acid_codes[alignment[i][j]];
        fprintf(stdout,"%c", c);
       }
      fprintf(stdout,"\n\n");
     }
}

}                  

static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2)
{
   int i,ix;
   char *ta;

   ta = (char *) ckalloc( (len+1) * sizeof (char) );

       ix = 0;
       if (switch_profiles == FALSE)
        {     
         for (i=0;i<len;i++)
           {
             if (path1[i] == 2)
              {
                ta[i] = mask[ix];
                ix++;
              }
             else if (path1[i] == 1)
                ta[i] = gap_pos1;
           }
        }
       else
        {
         for (i=0;i<len;i++)
          {
            if (path2[i] == 2)
             {
               ta[i] = mask[ix];
               ix++;
             }
            else if (path2[i] == 1)
             ta[i] = gap_pos1;
           }
         }
       mask = (char *)realloc(mask,(len+2) * sizeof (char));
       for (i=0;i<len;i++)
         mask[i] = ta[i];
       mask[i] ='\0';
       

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -