⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readtree.c

📁 是有关基因比对的经典算法的实现。这对于初学计算生物学的人是非常重要的算法。
💻 C
📖 第 1 页 / 共 2 页
字号:

/*
  insert a new node as the ancestor of the node which produces the shallowest
  tree.
*/
   if (rootptr == ptree)
     {
        mindiff = rootptr->left->dist + rootptr->right->dist;
        rootptr = rootptr->right;
     }
   rootnode = insert_root(rootptr, mindiff);
  
   diff = calc_root_mean(rootnode, &maxdist,lptr);

   return(rootnode);
}

static INODEPTR insert_root(INODEPTR p, float diff)
{
   INODEPTR newp, prev, q, t;
   float dist, prevdist,td;

   newp = avail();

   t = p->parent;
   prevdist = t->dist;

   p->parent = newp;

   dist = p->dist;

   p->dist = diff / 2;
   if (p->dist < 0.0) p->dist = 0.0;
   if (p->dist > dist) p->dist = dist;

   t->dist = dist - p->dist; 

   newp->left = t;
   newp->right = p;
   newp->parent = NULL;
   newp->dist = 0.0;
   newp->leaf = NODE;

   if (t->left == p) t->left = t->parent;
   else t->right = t->parent;

   prev = t;
   q = t->parent;

   t->parent = newp;

   while (q != NULL)
     {
        if (q->left == prev)
           {
              q->left = q->parent;
              q->parent = prev;
              td = q->dist;
              q->dist = prevdist;
              prevdist = td;
              prev = q;
              q = q->left;
           }
        else
           {
              q->right = q->parent;
              q->parent = prev;
              td = q->dist;
              q->dist = prevdist;
              prevdist = td;
              prev = q;
              q = q->right;
           }
    }

/*
   remove the old root node
*/
   q = prev;
   if (q->left == NULL)
      {
         dist = q->dist;
         q = q->right;
         q->dist += dist;
         q->parent = prev->parent;
         if (prev->parent->left == prev)
            prev->parent->left = q;
         else
            prev->parent->right = q;
         prev->right = NULL;
      }
   else
      {
         dist = q->dist;
         q = q->left;
         q->dist += dist;
         q->parent = prev->parent;
         if (prev->parent->left == prev)
            prev->parent->left = q;
         else
            prev->parent->right = q;
         prev->left = NULL;
      }

   return(newp);
}

static float calc_root_mean(INODEPTR root, float *maxdist,INODEPTR *lptr)
{
   float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
   INODEPTR p;
   sint i;
   sint nl, nr;
   sint direction;
/*
   for each leaf, determine whether the leaf is left or right of the root.
*/
   dist = (*maxdist) = 0;
   nl = nr = 0;
   for (i=0; (p=lptr[i])!=NULL; i++)
     {
         dist = 0.0;
         while (p->parent != root)
           {
               dist += p->dist;
               p = p->parent;
           }
         if (p == root->left) direction = LEFT;
         else direction = RIGHT;
         dist += p->dist;

         if (direction == LEFT)
           {
             lsum += dist;
             nl++;
           }
         else
           {
             rsum += dist;
             nr++;
           }
        if (dist > (*maxdist)) *maxdist = dist;
     }

   lmean = lsum / nl;
   rmean = rsum / nr;

   diff = lmean - rmean;
   return(diff);
}


static float calc_mean(INODEPTR nptr, float *maxdist, sint nseqs,INODEPTR *lptr)
{
   float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff;
   INODEPTR p, *path2root;
   float *dist2node;
   sint depth = 0, i, n = 0;
   sint nl , nr;
   sint direction;

	path2root = (INODEPTR *)ckalloc(nseqs * sizeof(INODEPTR));
	dist2node = (float *)ckalloc(nseqs * sizeof(float));
   nl = nr = 0;
   (*maxdist) = 0;
/*
   determine all nodes between the selected node and the root;
*/
   get_path2root(nptr,path2root,dist2node,&depth);
 
/*
   for each leaf, determine whether the leaf is left or right of the node.
   (where RIGHT = descendant, LEFT = not descendant)
*/
   for (i=0; (p=lptr[i])!=NULL; i++)
     {
       if (p == nptr)
         {
            direction = RIGHT;
            dist = 0.0;
         }
       else
         {
            direction = LEFT;
/*
   find the common ancestor.
*/
	    n=common_ancestor(p,path2root,depth,&dist);
            if (path2root[n] == nptr) direction = RIGHT;
         }

         if (direction == LEFT)
           {
             lsum += dist;
             lsum += dist2node[n-1];
             nl++;
           }
         else
           {
             rsum += dist;
             nr++;
           }

        if (dist > (*maxdist)) *maxdist = dist;
     }

	dist2node=ckfree((void *)dist2node);
	path2root=ckfree((void *)path2root);
	
   lmean = lsum / nl;
   rmean = rsum / nr;
   
   diff = lmean - rmean;
   return(diff);
}

void get_path2root(INODEPTR nptr,INODEPTR *path2root,float *dist2node,sint *depth)
{
	float dist;
	INODEPTR p;

	*depth = dist = 0;
	p = nptr;
	while (p != NULL) {
		path2root[*depth] = p;
		dist += p->dist;
		dist2node[*depth] = dist;
		p = p->parent;
		(*depth)++;
	}
}

/*
   find the intersection of the path from node p to the root, and the path in path2root
   depth = size of path2root
   dist = dist from the top of path2root and the intersection
*/
sint common_ancestor(INODEPTR p,INODEPTR *path2root,sint depth,float *dist)
{
	INODEPTR ancestor;
	Boolean found;
	sint n,j;

        found = FALSE;
        n = 0;
	ancestor=p;
        (*dist) = 0.0;
        while ((found == FALSE) && (ancestor->parent != NULL)) {
               for (j=0; j< depth; j++)
                 if (ancestor->parent == path2root[j]) { 
                      found = TRUE;
                      n = j;
                 }
               (*dist) += ancestor->dist;
               ancestor = ancestor->parent;
        }

	return n;
}

static void order_nodes(INODEPTR *lptr)
{
   sint i;
   INODEPTR p;

   for (i=0; (p=lptr[i])!=NULL; i++)
     {
        while (p != NULL)
          {
             p->order++;
             p = p->parent;
          }
     }
}

/*
   Calculates a matrix of pairwise sequence similarities from a phylogenetic tree
	seqs	the sequences
	nseqs	number of sequences
	itree	the phylogenetic tree
   returns the similarity matrix
*/
double ** calc_similarities(SEQ *seqs,sint nseqs,IN_TREEPTR itree)
{
   sint depth = 0, i,j, k, n;
   sint found;
   sint nerrs, *seq1,*seq2;
   INODEPTR p, *path2root;
   float dist;
   float *dist2node, *bad_dist;
   double **tmat;
   char err_mess[1024],err1[MAXLINE],reply[MAXLINE];

	tmat = (double **) ckalloc( (nseqs+1) * sizeof (double *) );
        for(i=0;i<nseqs;i++)
                tmat[i] = (double *)ckalloc( (nseqs+1) * sizeof (double) );
        for(i=0;i<nseqs;i++)
                for(j=0;j<nseqs;j++)
                        tmat[i][j]=0.0;

   path2root = (INODEPTR *)ckalloc((nseqs) * sizeof(INODEPTR));
   dist2node = (float *)ckalloc((nseqs) * sizeof(float));
   seq1 = (sint *)ckalloc((nseqs) * sizeof(sint));
   seq2 = (sint *)ckalloc((nseqs) * sizeof(sint));
   bad_dist = (float *)ckalloc((nseqs) * sizeof(float));

   if (nseqs < 2) return(NULL);
/*
   for each leaf, determine all nodes between the leaf and the root;
*/
      for (i = 0;i<nseqs; i++)
       { 
          depth = dist = 0;
          p = itree->leafptr[i];
          while (p != NULL)
            {
                path2root[depth] = p;
                dist += p->dist;
                dist2node[depth] = dist;
                p = p->parent;
                depth++;
            }
 
/*
   for each pair....
*/
          for (j=0; j < i; j++)
            {
              p = itree->leafptr[j];
              dist = 0.0;
/*
   find the common ancestor.
*/
              found = FALSE;
              n = 0;
              while ((found == FALSE) && (p->parent != NULL))
                {
                    for (k=0; k< depth; k++)
                      if (p->parent == path2root[k])
                         { 
                           found = TRUE;
                           n = k;
                         }
                    dist += p->dist;
                    p = p->parent;
                }
   
              tmat[i][j] = dist + dist2node[n-1];
            }
        }

		nerrs = 0;
        for (i=0;i<nseqs;i++)
          {
             tmat[i][i] = 0.0;
             for (j=0;j<i;j++)
               {
                  if (tmat[i][j] < 0.01) tmat[i][j] = 0.01;
                  if (tmat[i][j] > 1.0) {
                  	if (tmat[i][j] > 1.1) {
                  		seq1[nerrs] = i;
                  		seq2[nerrs] = j;
                  		bad_dist[nerrs] = tmat[i][j];
                  		nerrs++;
                  	}
                    tmat[i][j] = 1.0;
                  }
               }
          }
        if (nerrs>0) 
          {
             strcpy(err_mess,"The following sequences are too divergent to be aligned:\n");
             for (i=0;i<nerrs && i<5;i++)
              {
             	sprintf(err1,"           %s and %s (distance %1.3f)\n",
             	                        seqs[seq1[i]+1].name,
					seqs[seq2[i]+1].name,bad_dist[i]);
             	strcat(err_mess,err1);
              }
	     strcat(err_mess,"(All distances should be between 0.0 and 1.0)\n");
	     strcat(err_mess,"This may not be fatal but you have been warned!\n");
             strcat(err_mess,"SUGGESTION: Remove one or more problem sequences and try again");
             (*reply)=prompt_for_yes_no(err_mess,"Continue ");
             if ((*reply != 'y') && (*reply != 'Y'))
                    return(NULL);
          }

   path2root=ckfree((void *)path2root);
   dist2node=ckfree((void *)dist2node);
   seq1=ckfree((void *)seq1);
   seq2=ckfree((void *)seq2);
   bad_dist=ckfree((void *)bad_dist);

   for (i=0;i<nseqs;i++)
     {
        tmat[i][i] = 0.0;
        for (j=0;j<i;j++)
          {
             tmat[i][j] = 100.0 - (tmat[i][j]) * 100.0;
             tmat[j][i] = tmat[i][j];
          }
     }
   return(tmat);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -