⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mast-util.c

📁 EM算法的改进
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * $Id: mast-util.c 1339 2006-09-21 19:46:28Z tbailey $ *  * $Log$ * Revision 1.3  2006/03/08 20:50:11  nadya * merge chamges from v3_5_2 branch * * Revision 1.2.4.1  2006/01/24 20:44:08  nadya * update copyright * * Revision 1.2  2005/10/20 00:20:52  tbailey * *** empty log message *** * * Revision 1.1.1.1  2005/07/29 17:17:18  nadya * Importing from meme-3.0.14, and adding configure/make * *//*************************************************************************                                                                     	**       MAST                                                           	**       Author: Timothy L. Bailey                                      	**                                                                       **	Copyright							**	(1994 - 2006) The Regents of the University of California.	**	All Rights Reserved.						**									**	Permission to use, copy, modify, and distribute any part of 	**	this software for educational, research and non-profit purposes,**	without fee, and without a written agreement is hereby granted, **	provided that the above copyright notice, this paragraph and 	**	the following three paragraphs appear in all copies.		**									**	Those desiring to incorporate this software into commercial 	**	products or use for commercial purposes should contact the 	**	Technology Transfer Office, University of California, San Diego,**	9500 Gilman Drive, La Jolla, California, 92093-0910, 		**	Ph: (858) 534 5815.						**									**	IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO 	**	ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR 	**	CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF 	**	THE USE OF THIS SOFTWARE, EVEN IF THE UNIVERSITY OF CALIFORNIA 	**	HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 		**									**	THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE **	UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE 		**	MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.  **	THE UNIVERSITY OF CALIFORNIA MAKES NO REPRESENTATIONS AND 	**	EXTENDS NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, **	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 	**	MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT 	**	THE USE OF THE MATERIAL WILL NOT INFRINGE ANY PATENT, 		**	TRADEMARK OR OTHER RIGHTS.  					*************************************************************************/#include <mast.h>#define BCHUNK 1000		/* maximum size of a block diagram entry */#define BLIMIT 100		/* min. remainder before adding to block array *//*	Local Declarations*/static SCORE **score_it(  BOOLEAN xlate_dna,	/* database is DNA and motifs protein */  LO *los[], 		/* array of pointers to log-odds matrices */  int nmotifs, 		/* number of motifs */  char *sequence,	/* sequence */  long length 		/* length of the sequence */);static BOOLEAN best_hit(  int index, 		/* Position of the given motif. */  int motif,		/* Motif number. */  int *hits,		/* Motif indices. */  double *pvalues,	/* Array of pvalues. */  long length		/* Length of pvalue array. */);static TILING tile_sequence(  STYPE stype,				/* treatment of strands of DNA */  double **pv,				/* p-value tables for each motif */  double thresh,	 		/* mark hits with p-value < thresh */  BOOLEAN use_seq_p,			/* use sequence not position p-values */  LO *los[],				/* array of pointers to lo matrices */  int nmotifs,				/* number motifs read */   long length,				/* length of sample */  SCORE **scores 			/* scores for each motif vs seq. pos. */);/*	Procedures*//**********************************************************************//*	score_sequence	Compute the scores for each motif and a given sequence.        Returns *scores[] : scores[i][j].score = score of motif i at position j        Returns *scores[] : scores[i][j].ic = score on - strand*//**********************************************************************/extern SCORE **score_sequence(  STYPE stype,		/* how to treat different strands of DNA */  BOOLEAN xlate_dna,	/* database is DNA and motifs protein */  char *sequence,	/* the sequence to score */  long length,		/* length of the sequence */  int nmotifs, 		/* number of motifs */  LO *los[]   		/* array of pointers to log-odds matrices */){  long i, j, k;  SCORE **scores;  	/* scores[i][j].score = motif i at position j */  /*     Score the sequence with each motif on positive strand  */  scores = score_it(xlate_dna, los, nmotifs, sequence, length);  /*     Hash reverse complement sequence and score it;     combine scores saving the MAX score for both strands at each position  */  if (stype == Combine) {		/* combine scores from both strands */    char *icseq = NULL;			/* reverse complement sequence */    SCORE **icscores;  			/* scores on - strand */    Resize(icseq, length+1, char);    for (i=0,j=length-1;i<length; i++, j--) icseq[j] = comp_dna(sequence[i]);    icseq[i] = '\0';    icscores = score_it(xlate_dna, los, nmotifs, icseq, length);     /* combine scores */    for (i=0; i<nmotifs; i++) {			/* motif */      int w = los[i]->ws;      long last_j = length-w;			/* last possible site */      if (!icscores[i]) continue;		/* skip if no scores */      for (j=0,k=last_j; j<=last_j; j++, k--) {	/* site */        if (icscores[i][k].score > scores[i][j].score) {          scores[i][j].score = icscores[i][k].score;          scores[i][j].ic = TRUE;        }       } /* site */    } /* motif */    myfree(icseq);    free_2array(icscores, nmotifs);  }  return scores;} /* score_sequence *//**********************************************************************//*	tile_sequence	The sequence is tiled with motif occurrences with p-values > thresh      	such that:	  1) occurrences do not overlap	  2) smaller numbered motif occurrences place first	  3) new occurrence replaces old occurrences it overlaps if its p-value             is less than the product of their p-values	Returns a tiling structure containing		hits =		  +motif number, 	hit on + strand		  -motif number, 	hit on reverse complement, 		  0, 			no hit		pvalues			position p-value of hit		pv			p-value of the product of p-values*//**********************************************************************/static TILING tile_sequence(  STYPE stype,				/* treatment of strands of DNA */  double **pv,				/* p-value tables for each motif */  double thresh,	 		/* mark hits with p-value < thresh */  BOOLEAN use_seq_p,			/* use sequence not position p-values */  LO *los[],				/* array of pointers to lo matrices */  int nmotifs,				/* number motifs read */   long length,				/* length of sample */  SCORE **scores 			/* scores for each motif vs seq. pos. */){  long i, j, k;  int imotif;  int maxws = 0;                     	/* maximum width in seq */  TILING tiling;		 	/* tiling record */  int *hits = NULL;			/* non-overlapping hits */  double *pvalues = NULL;		/* hit p-values */  int nstrands = (stype == Combine) ? 2 : 1;	/* number of strands for score*/  double prod_of_p;			/* product of sequence p-values */  int smotifs;				/* number of motifs scored */  /*     create storage for hits and pvalues   */  Resize(hits, length, int);  Resize(pvalues, length, double);  /*    clear array of hits  */  for (i=0; i<length; i++) hits[i] = 0;  /*    mark non-overlapping hits for all motifs  */  prod_of_p = 1;  for (imotif=smotifs=0; imotif<nmotifs; imotif++) {	/* motif */    LO *lo = los[imotif];		/* logodds matrix of motif */    int ws = lo->ws;			/* width of current motif in sequence */    double pvalue;			/* position p-value of score */    double seq_pvalue;			/* sequence p-value of score */    double best_score = LITTLE;		/* best score for motif */    long n = nstrands*(length - ws + 1);	/* possible positions for score */    if (!scores[imotif]) continue;	/* skip motif if too long */    smotifs++;				/* motif was scored */    maxws = MAX(ws, maxws);		/* maximum motif width in seq so far */    for (j=0; j <= length - ws; j++) {	/* site start in sequence */      int score = scores[imotif][j].score;	/* score of site */      int ic = scores[imotif][j].ic;	/* site on reverse strand */      /* update best score for motif */      if (score > best_score) best_score = score;      /* get position p-value of score */      pvalue = pv[imotif][(int) score];      /* get sequence p-value of score if using instead of position p-value */      if (use_seq_p) { EV(pvalue, n, seq_pvalue); pvalue = seq_pvalue; }      /* 	create list of non-overlapping motifs for this motif      */      if (pvalue < thresh) {	BOOLEAN ok_to_mark = TRUE;	/* make motif if true */	double prod = 1;		/* product of overlapped p-values */	long first = MAX(0,j-maxws+1);	/* last overlap from left */	long last = MIN(j+ws, length);	/* past last overlap to right */	/*           get product of p-values motifs this would overlap to right         */	for (k=j; k<last && ok_to_mark; k++) {	  if (hits[k] != 0) {		/* motif already here? */	    prod *= pvalues[k];	    if (pvalue >= prod) ok_to_mark = FALSE;	  }	}	/*           get product of p-values motifs this would overlap from left         */	for (k=first; k<j && ok_to_mark; k++) {	  int m = abs(hits[k])-1;	  if (m >= 0 && los[m]->ws > j-k) {/* motif already here? */	    prod *=  pvalues[k];	    if (pvalue >= prod) ok_to_mark = FALSE;	  }	}	/*           mark motif if ok         */	if (ok_to_mark) {	  hits[j] = ic ? -(imotif+1) : imotif+1;	  pvalues[j] = pvalue;	  /* remove overlapped motifs on right */	  for (k=j+1; k<last; k++) {	    hits[k] = 0;	    pvalues[k] = 1;	  }	  /*             remove overlapped motifs on left           */	  for (k=first; k<j; k++) {	    int m = abs(hits[k])-1;	    if (m >= 0 && los[m]->ws > j-k) {	      hits[k] = 0;	      pvalues[k] = 1;	    }	  }	} /* mark motif */      } /* pvalue < thresh */    } /* site start */    /* get sequence p-value of best score and take product of p-values */    pvalue = pv[imotif][(int) best_score];    EV(pvalue, n, seq_pvalue);    prod_of_p *= seq_pvalue;  } /* imotif */  /*     return the sequence tiling  */  tiling.hits = hits;  tiling.pvalues = pvalues;  tiling.pv = qfast(smotifs, prod_of_p);  return(tiling);} /* tile_sequence *//**********************************************************************//*	create_diagram	Create a block diagram of the motifs in the sequence.	Returns a block diagram showing the order and spacing of the hits	where hits are either		strong 		p-value < thresh 		weak		otherwise.*//**********************************************************************/extern char *create_diagram(  BOOLEAN dna,				/* database is DNA */  STYPE stype,				/* treatment of strands of DNA */  BOOLEAN xlate_dna,			/* database is DNA and motifs protein */  BOOLEAN best_motifs, 			/* diagrams have only best motif */  BOOLEAN print_p,			/* print p-value in block */  double thresh,			/* strong hit threshold */  int nmotifs,				/* number of motifs */  LO *los[],				/* array of pointers to lo matrices */  long length,				/* length of sample */  BOOLEAN hit_list,			/* create hit list instead of diagram */  char *name,				/* name of sequence; only used if hit_list=TRUE */  TILING tiling				/* tiling of sequence */){  long i;  int c = 0;					/* current position */  int spacer = 0;				/* current spacer */  char tmp[100];				/* scratch space */  char *blocks=NULL; 				/* motif diagram */  int bsize = 0;				/* current size of blocks array */  int *hits = tiling.hits;			/* non-overlapping hits */  double *pvalues = tiling.pvalues;		/* hit p-values */  /* create the diagram */  for (i=0; i < length; i++) {    int m = abs(hits[i])-1;			/* motif of hit */    BOOLEAN ic = hits[i] < 0;			/* hit on reverse strand */    char *strand = stype==Combine ? (ic ? "-" : "+") : "";     int frame = xlate_dna ? i%3 + 1 : 0;	/* frame if translating DNA */    if ((m >= 0) &&				/* start of new motif */      (!best_motifs || best_hit(i, m, hits, pvalues, length)) ) {      if (spacer > 0) {                    	/* spacer ending */        if (bsize-c < BLIMIT) Resize(blocks, bsize+=BCHUNK, char);        if (!hit_list) c += sprintf(blocks+c, "%d_", spacer);	/* spacer */      }      if (hit_list) {        if (bsize-c < BLIMIT) Resize(blocks, bsize+=BCHUNK, char);        /* c += sprintf(blocks+c, " %s%d %ld %ld %8.2e,", */        c += sprintf(blocks+c, "%s %s%d %ld %ld %8.2e\n",           name, strand, los[m]->name, i+1, i+los[m]->ws, pvalues[i]);      } else {	make_block(los[m]->name, strand, frame, thresh, pvalues[i], print_p, tmp);        if (bsize-c < BLIMIT) Resize(blocks, bsize+=BCHUNK, char);	c += sprintf(blocks+c, "%s_", tmp);	/* block */      }      spacer = -los[m]->ws + 1;			/* account for width */    } else {      spacer++;					/* increase spacer */    }  }  if (hit_list) {    /* nothing to do */  } else if (spacer > 0) {    if (bsize-c < BLIMIT) Resize(blocks, bsize+=BCHUNK, char);    sprintf(blocks+c, "%d", spacer);        /* final spacer */  } else if (c>0) {    blocks[c-1] = '\0';                     /* remove final dash or comma */  } else {    Resize(blocks, 1, char);    blocks[0] = '\0';  }  return blocks;} /* create_diagram *//**********************************************************************//*	print_diagram	Print the motif block diagram.*//**********************************************************************/extern void print_diagram(  char *dia,				/* motif diagram string */  char *hdr,				/* prefix for each line of diagram */  FILE *file				/* destination file */){  int j;  int dia_len = strlen(dia);			/* length of diagram */  int hlen = strlen(hdr);			/* length of header */  for (j=0; j < dia_len; ) {    int remain = dia_len - j;			/* left to print */    int dlen;					/* room on line */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -