⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 malign.c

📁 生物序列比对程序clustw的源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
   maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));   for (i=1;i<=nseqs;i++)     {         maxid[i] = -1;         for (j=1;j<=nseqs;j++)            if (maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j];     }/* clear the memory used for the phylogenetic tree */        if (nseqs >= 2)             clear_tree(NULL);        aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );        ix = 0;        for (i=1;i<=istart+1;i++)         {           aligned[i] = 1;           ++ix;           output_index[i] = i;         }        for (i=istart+2;i<=nseqs;i++) aligned[i] = 0;/* for each unaligned sequence, find it's closest pair amongst the   aligned sequences.  */    group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));    while (ix < nseqs)      {        if (ix > 0)           {             for (i=1;i<=nseqs;i++) {                if (aligned[i] == 0)                  {                     maxid[i] = -1;                     for (j=1;j<=nseqs;j++)                         if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0))                            maxid[i] = tmat[i][j];                  }              }          }/* find the most closely related sequence to those already aligned */         max = -1;         for (i=1;i<=nseqs;i++)           {             if ((aligned[i] == 0) && (maxid[i] > max))               {                  max = maxid[i];                  iseq = i;               }           }/* align this sequence to the existing alignment */         entries = 0;         for (j=1;j<=nseqs;j++)           if (aligned[j] != 0)              {                 group[j] = 1;                 entries++;              }           else if (iseq==j)              {                 group[j] = 2;                 entries++;              }         aligned[iseq] = 1;/* EITHER....., set sequence weights equal to percent identity with new sequence *//*           for (j=0;j<nseqs;j++)              seq_weight[j] = tmat[j+1][iseq];*//* OR...., multiply sequence weights from tree by percent identity with new sequence */           for (j=0;j<nseqs;j++)              seq_weight[j] = tree_weight[j] * tmat[j+1][iseq];if (debug>1)           for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf (stdout,"sequence %d: %d\n", j+1,tree_weight[j]);/*  Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR*/         sum = 0;         for (j=0;j<nseqs;j++)           if (group[j+1] == 1) sum += seq_weight[j];         if (sum == 0)          {           for (j=0;j<nseqs;j++)                seq_weight[j] = 1;                sum = j;          }         for (j=0;j<nseqs;j++)             {               seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;               if (seq_weight[j] < 1) seq_weight[j] = 1;             }if (debug > 1) {  fprintf(stdout,"new weights\n");  for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]);}         score = prfalign(group, aligned);         info("Sequence:%d     Score:%d",(pint)iseq,(pint)score);         if (output_order == INPUT)          {            ++ix;            output_index[iseq] = iseq;          }         else            output_index[++ix] = iseq;      }   group=ckfree((void *)group);   aligned=ckfree((void *)aligned);   maxid=ckfree((void *)maxid);   aln_score();/* make the rest (output stuff) into routine clustal_out in file amenu.c */   return(nseqs);}sint palign1(void)  /* a profile alignment */{   sint 		i,j,temp;   sint 		entries;   sint 		*aligned, *group;   float        dscore;   lint			score;   info("Start of Initial Alignment");/* calculate sequence weights according to branch lengths of the tree -   weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */   temp = INT_SCALE_FACTOR/nseqs;   for (i=0;i<nseqs;i++) seq_weight[i] = temp;   distance_tree = FALSE;/* do the initial alignment.........  */   group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));   for(i=1; i<=profile1_nseqs; ++i)         group[i] = 1;   for(i=profile1_nseqs+1; i<=nseqs; ++i)         group[i] = 2;   entries = nseqs;   aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );   for (i=1;i<=nseqs;i++) aligned[i] = 1;   score = prfalign(group, aligned);   info("Sequences:%d      Score:%d",(pint)entries,(pint)score);   group=ckfree((void *)group);   aligned=ckfree((void *)aligned);   for (i=1;i<=nseqs;i++) {     for (j=i+1;j<=nseqs;j++) {       dscore = countid(i,j);       tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0;       tmat[j][i] = tmat[i][j];     }   }   return(nseqs);}float countid(sint s1, sint s2){   char c1,c2;   sint i;   sint count,total;   float score;   count = total = 0;   for (i=1;i<=seqlen_array[s1] && i<=seqlen_array[s2];i++) {     c1 = seq_array[s1][i];     c2 = seq_array[s2][i];     if ((c1>=0) && (c1<max_aa)) {       total++;       if (c1 == c2) count++;     }   }   if(total==0) score=0;   else   score = 100.0 * (float)count / (float)total;   return(score);}sint palign2(char *p1_tree_name,char *p2_tree_name)  /* a profile alignment */{   sint 	i,j,sum,entries,status;   lint 		score;   sint 	*aligned, *group;   sint		*maxid,*p1_weight,*p2_weight;   sint dscore;   info("Start of Multiple Alignment");/* get the phylogenetic trees from *.ph */   if (profile1_nseqs >= 2)     {        status = read_tree(p1_tree_name, (sint)0, profile1_nseqs);        if (status == 0) return(0);     }/* calculate sequence weights according to branch lengths of the tree -   weights in global variable seq_weight normalised to sum to 100 */   p1_weight = (sint *) ckalloc( (profile1_nseqs) * sizeof(sint) );   calc_seq_weights((sint)0, profile1_nseqs, p1_weight);/* clear the memory for the phylogenetic tree */   if (profile1_nseqs >= 2)        clear_tree(NULL);   if (nseqs-profile1_nseqs >= 2)     {        status = read_tree(p2_tree_name, profile1_nseqs, nseqs);        if (status == 0) return(0);     }   p2_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) );   calc_seq_weights(profile1_nseqs,nseqs, p2_weight);/* clear the memory for the phylogenetic tree */   if (nseqs-profile1_nseqs >= 2)        clear_tree(NULL);/* convert tmat distances to similarities */   for (i=1;i<nseqs;i++)        for (j=i+1;j<=nseqs;j++) {            tmat[i][j]=100.0-tmat[i][j]*100.0;            tmat[j][i]=tmat[i][j];        }     /* weight sequences with max percent identity with other profile*/   maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));   for (i=0;i<profile1_nseqs;i++) {         maxid[i] = 0;         for (j=profile1_nseqs+1;j<=nseqs;j++)                       if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j];         seq_weight[i] = maxid[i]*p1_weight[i];   }   for (i=profile1_nseqs;i<nseqs;i++) {         maxid[i] = -1;         for (j=1;j<=profile1_nseqs;j++)                      if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j];         seq_weight[i] = maxid[i]*p2_weight[i];   }/*  Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR*/         sum = 0;         for (j=0;j<nseqs;j++)            sum += seq_weight[j];         if (sum == 0)          {           for (j=0;j<nseqs;j++)                seq_weight[j] = 1;                sum = j;          }         for (j=0;j<nseqs;j++)             {               seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;               if (seq_weight[j] < 1) seq_weight[j] = 1;             }if (debug > 1) {  fprintf(stdout,"new weights\n");  for (j=0;j<nseqs;j++) fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]);}/* do the alignment.........  */   info("Aligning...");   group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));   for(i=1; i<=profile1_nseqs; ++i)         group[i] = 1;   for(i=profile1_nseqs+1; i<=nseqs; ++i)         group[i] = 2;   entries = nseqs;   aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );   for (i=1;i<=nseqs;i++) aligned[i] = 1;   score = prfalign(group, aligned);   info("Sequences:%d      Score:%d",(pint)entries,(pint)score);   group=ckfree((void *)group);   p1_weight=ckfree((void *)p1_weight);   p2_weight=ckfree((void *)p2_weight);   aligned=ckfree((void *)aligned);   maxid=ckfree((void *)maxid);/* DES   output_index = (int *)ckalloc( (nseqs+1) * sizeof (int)); */   for (i=1;i<=nseqs;i++) output_index[i] = i;   return(nseqs);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -