📄 display.c
字号:
/* * $Id: display.c 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.4 2006/03/08 20:50:11 nadya * merge chamges from v3_5_2 branch * * Revision 1.3.4.1 2006/01/24 20:44:08 nadya * update copyright * * Revision 1.3 2005/10/25 19:00:37 nadya * change c++ style comment to proper c * * Revision 1.2 2005/10/20 00:20:52 tbailey * *** empty log message *** * * Revision 1.1.1.1 2005/07/29 23:35:53 nadya * Importing from meme-3.0.14, and adding configure/make * *//************************************************************************ ** MEME ** Copyright 1994-2006, The Regents of the University of California ** Author: Timothy L. Bailey ** ************************************************************************//**********************************************************************//* Display routines for the results of MEME*//**********************************************************************//* 7-23-99 tlb; replace nsites() calls with model->nsites *//* 7-16-99 tlb; move SITE to meme.h, get_sites to meme_util.c *//* 7-14-99 tlb; change simplified prob. matrix to be observed frequencies, consensus to be based on observed frequencies *//* 4-7-99 tlb; fix bug in get_sites: setting of best site in oops and zoops */#include <meme.h>#include <mast.h>#include <string.h>static char *yesno[] = {"no", "yes"};static char *stars = NULL;/* number of different integral score values for computing p-values */#define RANGE 100/* stepsize and size of smoothing window for get_q */#define NSTEPS 100/*#define WINDOW (NSTEPS/10)*/#define WINDOW 0/* round double to integer; round up if half way between *//* only works for positive numbers */#ifndef NINT #define NINT(x) ((int) ((x)+0.5))#endif/* maximum number of levels in consensus sequence */#define MAXDEPTH ((int) (1.0/MINCONS))/* encode and decode a (seq #, offset) pair as one integer *//* this will bomb if seq. length > MAX_SEQ_LEN or if number of sequences * MAX_SEQ_LEN exceeds size of int */#define MAX_SEQ_LEN 10000#define ENCODE(I,J) (I) * MAX_SEQ_LEN + (J);#define DECODE(N, SEQ, OFF) {\ (OFF) = (N) % MAX_SEQ_LEN;\ (SEQ) = ((N) - (OFF)) / MAX_SEQ_LEN;\}/* distance to indent start of IC histogram, consensus and simplified motif */#define IND 13#define IND2 6/* record describing accuracy of motif */typedef struct { double thresh; /* optimal threshold */ int err; /* classification errors using thresh */ double roc; /* ROC */} ACCURACY;/* sortable sequence score record */typedef struct { double score; /* sequence score */ int class; /* class of sequence, 1=pos, 0=neg */ char *id;} SORTED_SCORE;/* local functions */static void print_sites( DATASET *dataset, /* the dataset */ MODEL *model, /* the model */ int format, /* 0=BLOCKS; 1=FASTA */ char *com, /* comment to append */ FILE *outfile /* where to print */);static void print_log_odds( int imotif, /* motif number */ DATASET *dataset, /* the dataset */ int w, /* width of motif */ BOOLEAN pair, /* double matrix if true */ LOGODDS logodds, /* log-odds matrix */ double bayes, /* threshold */ double log_ev /* log E-value of motif */);static void print_entropy( MODEL *model, /* the model */ DATASET *dataset, /* the dataset */ char *str_space, /* space for printing strand direction */ FILE *outfile /* stream for output */);static void print_candidates( CANDIDATE *candidates, /* list of candidate models */ DATASET *dataset, /* the dataset */ int max_w /* maximum width for motifs */);static ACCURACY *get_thresh( int w, /* width of motif */ LOGODDS logodds1, /* log-odds matrix: LOG2(m_ij/b_j) */ LOGODDS logodds2, /* log-odds matrix: LOG2(m_ij/a_ij) */ DATASET *pos, /* positive examples */ DATASET *neg, /* negative examples */ BOOLEAN print_scores /* print sorted scores */);static double meme_score_sequence( char *eseq, /* integer-coded sequence to score */ int length, /* length of the sequence */ int w, /* width of motif */ LOGODDS logodds1, /* log-odds matrix: LOG2(m_ij/b_j) */ LOGODDS logodds2 /* log-odds matrix: LOG2(m_ij/n_ij) */);static int s_compare( const void *v1, const void *v2);static double get_q( int nsteps, /* try nsteps from 0 to 1 */ int window, /* smoothing window radius */ int w, /* width of motif */ THETA theta, /* motif theta */ THETA neg_theta, /* anti-motif theta */ double *back, /* background motif */ DATASET *dataset, /* the dataset */ DATASET *neg_dataset, /* negative examples */ char *str_space /* space for printing strand direction */);static LO *create_lo( int name, /* name of motif */ int w, /* width of motif */ int alen, /* length of alphabet */ LOGODDS logodds, /* single-letter logodds matrix */ BOOLEAN pair, double threshold /* Bayes optimal threshold */);static void score_sites( DATASET *dataset, /* the dataset */ MODEL *model, /* the model */ LO *lo, /* LO structure */ double *pv /* p-values for scores of this motif */);static void print_site_diagrams( DATASET *dataset, /* the dataset */ MODEL *model, /* the model */ int nmotifs, /* number of motifs in los */ LO *los[MAXG], /* logodds structure for motif */ FILE *outfile /* where to print */);static void align_sites( DATASET *dataset, /* the dataset */ MODEL *model, /* the model */ LO *lo, /* LO structure */ double *pv, /* pvalues for scores of this motif */ FILE *outfile /* stream for output */);static void print_block_diagrams( MODEL *model, /* the model */ DATASET *dataset, /* the dataset */ LO *los[MAXG], /* logodds structures for motifs */ int nmotifs, /* number of motifs */ double *pv[MAXG], /* p-value tables for each motif */ FILE *outfile);static char *get_regexp( MODEL *model, /* the model */ DATASET *dataset, /* the dataset */ int prosite /* prosite ==1 [AK]-E-[EG]. ==0 [AK]E[EG] */);/**********************************************************************//* Print the results of EM*//**********************************************************************/extern void print_results( DATASET *dataset, /* the dataset */ DATASET *neg_dataset, /* negative examples */ MODEL *model, /* the model */ MODEL *neg_model, /* negative model */ CANDIDATE *candidates /* candidate models found */){ int i; int max_w = model->max_w; /* maximum width tried */ int nstrands = model->invcomp ? 2 : 1; /* # of strands to use */ int w = model->w; /* width of last component */ int nsites_dis = model->nsites_dis; /* # of sites after discretiz.*/ double m1, e1, m2, e2; /* for printing significance */ char *str_space = (nstrands == 1) ? "" : " "; BOOLEAN pair = neg_dataset ? neg_dataset->negtype == Pair : FALSE; BOOLEAN blend = neg_dataset ? neg_dataset->negtype == Blend : FALSE; THETA theta = model->theta; THETA obs = model->obs; THETA neg_theta = neg_model ? neg_model->theta : NULL; THETA neg_obs = neg_model ? neg_model->obs : NULL; double lambda = model->lambda; LOGODDS logodds, logodds1; ACCURACY *acc=NULL; char *cons = model->cons; int alength = dataset->alength; double *back = dataset->back; int imotif = model->imotif-1; /* index of motif */ double thresh; /* Bayes optimal threshold */ FILE *outfile = stdout; /* get entropy and E-value of motif */ calc_entropy(model, dataset); /* get p-value and E-value of motif */ exp10_logx(model->logpv/log(10.0), m1, e1, 1); exp10_logx(model->logev/log(10.0), m2, e2, 1); /* print the significant models */ if (VERBOSE) { print_candidates(candidates, dataset, max_w); } /* print the results for the model as a whole */ if (pair || blend) { /* negative motifs */ int n_nsites_dis = neg_model->nsites_dis; printf( "\n\n%s\nMOTIF%3d\twidth = %3d\tsites = %4d\tnegative sites = %4d", stars, model->imotif, w, nsites_dis, n_nsites_dis); } else { printf("\n\n%s\nMOTIF%3d\twidth = %4d sites = %3d ", stars, model->imotif, w, nsites_dis); } printf("llr = %.0f E-value = %3.1fe%+04.0f\n%s\n", model->llr, m2, e2, stars); if (VERBOSE) { printf("p-value = %3.1fe%+04.0f E-value = %3.1fe%+04.0f\n%s\n", m1, e1, m2, e2, stars); } /* print results for motif */ if (VERBOSE) { char *cons0 = model->cons0; printf("\n(best) %s --> %s\n", cons0, cons); printf("(best) w %3d nsites %5.1f lambda %8.7f IC/col %6.3f\n", w, lambda*wps(dataset, w), lambda, model->rel); printf("\n(best) IC %6.3f\n\n", w * model->rel); } /* print motif description */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, "\tMotif %d Description\n", model->imotif); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); /* print the simplified (1-digit) motif */ /* make the log-odds matrices */ /* calculate the optimal threshold (min classification error or Bayes' */ /* print the IC of the motif as a bar graph */ if (blend) { /* single blended motif */ /* get mixing parameter */ double q = get_q(NSTEPS, WINDOW, w, theta, neg_theta, back, dataset, neg_dataset, str_space); /* get threshold and performance of combined motif */ logodds = make_log_odds(theta, neg_theta, back, q, w, alength); acc = get_thresh(w, logodds, NULL, dataset, neg_dataset, FALSE); thresh = acc->thresh; myfree(acc); printf("\t\tPOSITIVE MOTIF\n"); print_theta(0, 2, model->nsites_dis, obs, w, model->logev, str_space, dataset, stdout); printf("\t\tNEGATIVE MOTIF\n"); print_theta(0, 2, neg_model->nsites_dis, neg_obs, w, model->logev, str_space, dataset, stdout); print_entropy(neg_model, dataset, str_space, stdout); } else if (pair) { /* pair of motifs, pos and neg */ logodds = make_log_odds(theta, NULL, back, 0, w, alength); logodds1 = make_log_odds(neg_theta, NULL, back, 0, w, alength); thresh = LOG2((1-lambda)/lambda); /* Bayes' threshold */ printf("\t\tPOSITIVE MOTIF\n"); print_theta(0, 2, model->nsites_dis, obs, w, model->logev, str_space, dataset, stdout); print_entropy(model, dataset, str_space, stdout); printf("\t\tNEGATIVE MOTIF\n"); print_theta(0, 2, neg_model->nsites_dis, neg_obs, w, model->logev, str_space, dataset, stdout); print_entropy(neg_model, dataset, str_space, stdout); myfree(logodds1); /* combine the matrices by appending P/N to P/B */ logodds1 = make_log_odds(theta, neg_theta, NULL, 1, w, alength); Resize(logodds, 2*w, double *); for (i=0; i<w; i++) logodds[w+i] = logodds1[i]; myfree(logodds1); } else { /* no negative motifs */ logodds = make_log_odds(theta, NULL, back, 0, w, alength); thresh = LOG2((1-lambda)/lambda); /* Bayes' threshold */ print_theta(0, 2, model->nsites_dis, obs, w, model->logev, str_space, dataset, stdout); print_entropy(model, dataset, str_space, stdout); } for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n"); /* create an LO structure and store it in the local array */ los[imotif] = create_lo(model->imotif, w, alength, logodds, pair, thresh); /* create a table of p-values and store it in the array */ los[imotif]->alen--; /* ignore X since freq isn't set right*/ /*pv[imotif] = calc_cdf(los[imotif], RANGE, dataset->back);*/ pv[imotif] = calc_pssm_cdf(los[imotif]->w, los[imotif]->alen, RANGE, los[imotif]->logodds, dataset->back); los[imotif]->alen++; /* score the sites and sort by position p-value */ score_sites(dataset, model, los[imotif], pv[imotif]); /* print alignment of the sites */ align_sites(dataset, model, los[imotif], pv[imotif], stdout); /* print diagrams of the sites */ print_site_diagrams(dataset, model, model->imotif, los, stdout); /* print the sites "making up" the model */ if (pair || blend) { print_sites(dataset, model, PRINT_FASTA, " (positive motif)", stdout); print_sites(neg_dataset, neg_model, PRINT_FASTA, " (negative motif)", stdout); } else { print_sites(dataset, model, PRINT_FASTA, "", stdout); } /* print the logodds matrix */ print_log_odds(model->imotif, dataset, w, pair, logodds, thresh,model->logev); /* print the observed frequency matrix */ print_theta(model->imotif, 1, model->nsites_dis, obs, w, model->logev, str_space, dataset, stdout); if (pair || blend) print_theta(model->imotif, 1, neg_model->nsites_dis, neg_obs, w, model->logev, str_space, dataset, stdout); /* print a regular expression corresponding to motif */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, "\tMotif %d regular expression\n", model->imotif); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, "%s\n", get_regexp(model, dataset, 0)); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n"); /* display elapsed time */ printf("\n\n\nTime %5.2f secs.\n\n", myclock()/1E6); /* print line of stars */ fprintf(stdout, "%s\n", stars); /* flush */ fflush(stdout);} /* print_results *//**********************************************************************//* create_lo Create an an LO structure from the logodds matrix; include 'X' in the alphabet.*//**********************************************************************/static LO *create_lo( int name, /* name of motif */ int w, /* width of motif */ int alen, /* length of alphabet */ LOGODDS logodds, /* single-letter logodds matrix */ BOOLEAN pair, double threshold /* Bayes optimal threshold */){ int i, j; LO *lo = NULL; /* the LO structure */ /* create a logodds structure */ Resize(lo, 1, LO); /* initialize it */ lo->alen = alen+1; /* add 'X' column */ lo->w = lo->ws = w; lo->pair = pair; lo->name = name; lo->thresh = threshold; /* make a copy of the logodds matrix and scale it to [0..range] */ create_2array(lo->logodds, LOGODDSB, w, lo->alen); for (i=0; i<w; i++) for(j=0; j<lo->alen; j++) lo->logodds(i,j) = logodds(i,j); scale_lo(&lo, 1, RANGE); /* scale */ make_double_lo(&lo, 1); /* make a double-letter logodds matrix */ return(lo);} /* create_lo *//**********************************************************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -