⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 display.c

📁 EM算法的改进
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * $Id: display.c 1339 2006-09-21 19:46:28Z tbailey $ *  * $Log$ * Revision 1.4  2006/03/08 20:50:11  nadya * merge chamges from v3_5_2 branch * * Revision 1.3.4.1  2006/01/24 20:44:08  nadya * update copyright * * Revision 1.3  2005/10/25 19:00:37  nadya * change c++ style comment to proper c * * Revision 1.2  2005/10/20 00:20:52  tbailey * *** empty log message *** * * Revision 1.1.1.1  2005/07/29 23:35:53  nadya * Importing from meme-3.0.14, and adding configure/make * *//************************************************************************								       **	MEME							       **	Copyright 1994-2006, The Regents of the University of California **	Author: Timothy L. Bailey				       **								       ************************************************************************//**********************************************************************//*	Display routines for the results of MEME*//**********************************************************************//* 7-23-99 tlb; replace nsites() calls with model->nsites *//* 7-16-99 tlb; move SITE to meme.h, get_sites to meme_util.c *//* 7-14-99 tlb; change simplified prob. matrix to be observed frequencies,  consensus to be based on observed frequencies *//* 4-7-99 tlb; fix bug in get_sites: setting of best site in oops and zoops */#include <meme.h>#include <mast.h>#include <string.h>static char *yesno[] = {"no", "yes"};static char *stars = NULL;/* number of different integral score values for computing p-values */#define RANGE 100/* stepsize and size of smoothing window for get_q */#define NSTEPS 100/*#define WINDOW (NSTEPS/10)*/#define WINDOW 0/* round double to integer; round up if half way between *//* only works for positive numbers */#ifndef NINT  #define NINT(x) ((int) ((x)+0.5))#endif/* maximum number of levels in consensus sequence */#define MAXDEPTH ((int) (1.0/MINCONS))/* encode and decode a (seq #, offset) pair as one integer *//* this will bomb if seq. length > MAX_SEQ_LEN or   if number of sequences * MAX_SEQ_LEN exceeds size of int */#define MAX_SEQ_LEN 10000#define ENCODE(I,J) (I) * MAX_SEQ_LEN + (J);#define DECODE(N, SEQ, OFF) {\  (OFF) = (N) % MAX_SEQ_LEN;\  (SEQ) = ((N) - (OFF)) / MAX_SEQ_LEN;\}/* distance to indent start of IC histogram, consensus and simplified motif */#define IND 13#define IND2 6/* record describing accuracy of motif */typedef struct {  double thresh;	/* optimal threshold */  int err;		/* classification errors using thresh */  double roc;		/* ROC */} ACCURACY;/* sortable sequence score record */typedef struct {  double score;		/* sequence score */  int class;		/* class of sequence,  1=pos, 0=neg */  char *id;} SORTED_SCORE;/* local functions */static void print_sites(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  int format,				/* 0=BLOCKS; 1=FASTA */  char *com,				/* comment to append */  FILE *outfile				/* where to print */);static void print_log_odds(  int imotif,			/* motif number */   DATASET *dataset,		/* the dataset */  int w,			/* width of motif */  BOOLEAN pair,			/* double matrix if true */  LOGODDS logodds,		/* log-odds matrix */  double bayes,  		/* threshold */  double log_ev			/* log E-value of motif */);static void print_entropy(  MODEL *model,			/* the model */  DATASET *dataset,		/* the dataset */  char *str_space,		/* space for printing strand direction */  FILE *outfile			/* stream for output */);static void print_candidates(  CANDIDATE *candidates, 	/* list of candidate models */  DATASET *dataset,		/* the dataset */  int max_w			/* maximum width for motifs */);static ACCURACY *get_thresh(  int w,				/* width of motif */  LOGODDS logodds1,			/* log-odds matrix: LOG2(m_ij/b_j) */  LOGODDS logodds2,			/* log-odds matrix: LOG2(m_ij/a_ij) */  DATASET *pos,				/* positive examples */  DATASET *neg,				/* negative examples */  BOOLEAN print_scores			/* print sorted scores */);static double meme_score_sequence(  char *eseq,		/* integer-coded sequence to score */  int length,		/* length of the sequence */  int w, 		/* width of motif */  LOGODDS logodds1, 	/* log-odds matrix: LOG2(m_ij/b_j) */  LOGODDS logodds2  	/* log-odds matrix: LOG2(m_ij/n_ij) */);static int s_compare(  const void *v1,  const void *v2);static double get_q(  int nsteps,					/* try nsteps from 0 to 1 */  int window,					/* smoothing window radius */  int w,					/* width of motif */  THETA theta,					/* motif theta */  THETA neg_theta, 				/* anti-motif theta */  double *back,					/* background motif */  DATASET *dataset,				/* the dataset */  DATASET *neg_dataset,				/* negative examples */  char *str_space		/* space for printing strand direction */);static LO *create_lo(  int name,				/* name of motif */  int w, 				/* width of motif */  int alen,				/* length of alphabet */  LOGODDS logodds,			/* single-letter logodds matrix */  BOOLEAN pair,  double threshold			/* Bayes optimal threshold */);static void score_sites(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  LO *lo,				/* LO structure */  double *pv 				/* p-values for scores of this motif */);static void print_site_diagrams(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  int nmotifs,				/* number of motifs in los */  LO *los[MAXG],			/* logodds structure for motif */  FILE *outfile				/* where to print */);static void align_sites(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  LO *lo,				/* LO structure */  double *pv,				/* pvalues for scores of this motif */  FILE *outfile				/* stream for output */);static void print_block_diagrams(  MODEL *model,				/* the model */  DATASET *dataset,			/* the dataset */  LO *los[MAXG],			/* logodds structures for motifs */  int nmotifs, 				/* number of motifs */  double *pv[MAXG],			/* p-value tables for each motif */  FILE *outfile);static char *get_regexp(  MODEL *model,			/* the model */  DATASET *dataset,		/* the dataset */  int prosite                   /* prosite ==1 [AK]-E-[EG]. ==0 [AK]E[EG] */);/**********************************************************************//*	Print the results of EM*//**********************************************************************/extern void print_results(  DATASET *dataset,		/* the dataset */  DATASET *neg_dataset,		/* negative examples */  MODEL *model,			/* the model */  MODEL *neg_model, 		/* negative model */  CANDIDATE *candidates 	/* candidate models found */){  int i;  int max_w = model->max_w;			/* maximum width tried */  int nstrands = model->invcomp ? 2 : 1;	/* # of strands to use */  int w = model->w;				/* width of last component */  int nsites_dis = model->nsites_dis;	 	/* # of sites after discretiz.*/  double m1, e1, m2, e2;			/* for printing significance */  char *str_space = (nstrands == 1) ? "" : "       ";  BOOLEAN pair = neg_dataset ? neg_dataset->negtype == Pair : FALSE;  BOOLEAN blend = neg_dataset ? neg_dataset->negtype == Blend : FALSE;  THETA theta = model->theta;  THETA obs = model->obs;  THETA neg_theta = neg_model ? neg_model->theta : NULL;  THETA neg_obs = neg_model ? neg_model->obs : NULL;  double lambda = model->lambda;  LOGODDS logodds, logodds1;  ACCURACY *acc=NULL;  char *cons = model->cons;  int alength = dataset->alength;  double *back = dataset->back;  int imotif = model->imotif-1;			/* index of motif */  double thresh;				/* Bayes optimal threshold */  FILE *outfile = stdout;  /* get entropy and E-value of motif */  calc_entropy(model, dataset);  /* get p-value and E-value of motif */  exp10_logx(model->logpv/log(10.0), m1, e1, 1);  exp10_logx(model->logev/log(10.0), m2, e2, 1);  /* print the significant models */  if (VERBOSE) {    print_candidates(candidates, dataset, max_w);  }  /* print the results for the model as a whole */  if (pair || blend) {			/* negative motifs */    int n_nsites_dis = neg_model->nsites_dis;    printf( "\n\n%s\nMOTIF%3d\twidth = %3d\tsites = %4d\tnegative sites = %4d",      stars, model->imotif, w, nsites_dis, n_nsites_dis);  } else {    printf("\n\n%s\nMOTIF%3d\twidth = %4d   sites = %3d   ",      stars, model->imotif, w, nsites_dis);  }  printf("llr = %.0f   E-value = %3.1fe%+04.0f\n%s\n", model->llr, m2, e2,     stars);    if (VERBOSE) {    printf("p-value = %3.1fe%+04.0f   E-value = %3.1fe%+04.0f\n%s\n",       m1, e1, m2, e2, stars);  }  /* print results for motif */  if (VERBOSE) {    char *cons0 = model->cons0;    printf("\n(best) %s --> %s\n", cons0, cons);    printf("(best) w %3d nsites %5.1f lambda %8.7f IC/col %6.3f\n",      w, lambda*wps(dataset, w), lambda, model->rel);    printf("\n(best) IC %6.3f\n\n", w * model->rel);  }  /*     print motif description  */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, "\tMotif %d Description\n", model->imotif);  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  /* print the simplified (1-digit) motif */  /* make the log-odds matrices */  /* calculate the optimal threshold (min classification error or Bayes' */  /* print the IC of the motif as a bar graph */  if (blend) {			/* single blended motif */    /* get mixing parameter */    double q = get_q(NSTEPS, WINDOW, w, theta, neg_theta, back,      dataset, neg_dataset, str_space);    /* get threshold and performance of combined motif */    logodds = make_log_odds(theta, neg_theta, back, q, w, alength);    acc = get_thresh(w, logodds, NULL, dataset, neg_dataset, FALSE);    thresh = acc->thresh;    myfree(acc);    printf("\t\tPOSITIVE MOTIF\n");    print_theta(0, 2, model->nsites_dis, obs, w, model->logev,       str_space, dataset, stdout);    printf("\t\tNEGATIVE MOTIF\n");    print_theta(0, 2, neg_model->nsites_dis, neg_obs, w, model->logev,       str_space, dataset, stdout);    print_entropy(neg_model, dataset, str_space, stdout);  } else if (pair) {			/* pair of motifs, pos and neg */    logodds = make_log_odds(theta, NULL, back, 0, w, alength);    logodds1 = make_log_odds(neg_theta, NULL, back, 0, w, alength);    thresh = LOG2((1-lambda)/lambda);	/* Bayes' threshold */    printf("\t\tPOSITIVE MOTIF\n");    print_theta(0, 2, model->nsites_dis, obs, w, model->logev,       str_space, dataset, stdout);    print_entropy(model, dataset, str_space, stdout);    printf("\t\tNEGATIVE MOTIF\n");    print_theta(0, 2, neg_model->nsites_dis, neg_obs, w, model->logev,       str_space, dataset, stdout);    print_entropy(neg_model, dataset, str_space, stdout);    myfree(logodds1);    /* combine the matrices by appending P/N to P/B */    logodds1 = make_log_odds(theta, neg_theta, NULL, 1, w, alength);    Resize(logodds, 2*w, double *);    for (i=0; i<w; i++) logodds[w+i] = logodds1[i];    myfree(logodds1);  } else {				/* no negative motifs */    logodds = make_log_odds(theta, NULL, back, 0, w, alength);    thresh = LOG2((1-lambda)/lambda);	/* Bayes' threshold */    print_theta(0, 2, model->nsites_dis, obs, w, model->logev,       str_space, dataset, stdout);    print_entropy(model, dataset, str_space, stdout);  }  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n");   /* create an LO structure and store it in the local array */  los[imotif] = create_lo(model->imotif, w, alength, logodds, pair, thresh);    /* create a table of p-values and store it in the array */  los[imotif]->alen--;			/* ignore X since freq isn't set right*/  /*pv[imotif] = calc_cdf(los[imotif], RANGE, dataset->back);*/  pv[imotif] = calc_pssm_cdf(los[imotif]->w, los[imotif]->alen, RANGE, los[imotif]->logodds, dataset->back);  los[imotif]->alen++;  /* score the sites and sort by position p-value */  score_sites(dataset, model, los[imotif], pv[imotif]);  /* print alignment of the sites */  align_sites(dataset, model, los[imotif], pv[imotif], stdout);  /* print diagrams of the sites */  print_site_diagrams(dataset, model, model->imotif, los, stdout);  /* print the sites "making up" the model */  if (pair || blend) {    print_sites(dataset, model, PRINT_FASTA, " (positive motif)", stdout);    print_sites(neg_dataset, neg_model, PRINT_FASTA, " (negative motif)",       stdout);  } else {    print_sites(dataset, model, PRINT_FASTA, "", stdout);  }  /* print the logodds matrix */  print_log_odds(model->imotif, dataset, w, pair, logodds, thresh,model->logev);  /* print the observed frequency matrix */  print_theta(model->imotif, 1, model->nsites_dis, obs, w, model->logev,     str_space, dataset, stdout);  if (pair || blend)     print_theta(model->imotif, 1, neg_model->nsites_dis, neg_obs, w,       model->logev, str_space, dataset, stdout);  /*     print a regular expression corresponding to motif  */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, "\tMotif %d regular expression\n",     model->imotif);  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, "%s\n", get_regexp(model, dataset, 0));  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n");   /* display elapsed time */  printf("\n\n\nTime %5.2f secs.\n\n", myclock()/1E6);  /* print line of stars */  fprintf(stdout, "%s\n", stars);  /* flush */  fflush(stdout);} /* print_results *//**********************************************************************//*	create_lo	Create an an LO structure from the logodds matrix;	include 'X' in the alphabet.*//**********************************************************************/static LO *create_lo(  int name,				/* name of motif */  int w, 				/* width of motif */  int alen,				/* length of alphabet */  LOGODDS logodds,			/* single-letter logodds matrix */  BOOLEAN pair,  double threshold			/* Bayes optimal threshold */){  int i, j;  LO *lo = NULL;			/* the LO structure */    /* create a logodds structure */  Resize(lo, 1, LO);  /* initialize it */  lo->alen = alen+1;			/* add 'X' column */  lo->w = lo->ws = w;  lo->pair = pair;  lo->name = name;  lo->thresh = threshold;  /* make a copy of the logodds matrix and scale it to [0..range] */  create_2array(lo->logodds, LOGODDSB, w, lo->alen);  for (i=0; i<w; i++) for(j=0; j<lo->alen; j++) lo->logodds(i,j) = logodds(i,j);  scale_lo(&lo, 1, RANGE);				/* scale */  make_double_lo(&lo, 1);	/* make a double-letter logodds matrix */  return(lo);} /* create_lo *//**********************************************************************/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -