📄 display.c
字号:
){ int i, w; int hdr = 35; int tail = PAGEWIDTH - hdr; double m, e; /* for printing signficance */ printf("\nCandidate motifs:\n"); printf("width nsites ll signif consensus\n"); printf("----- ------ --------- ---------- ---------\n"); for (w=1; w<=max_w; w++) { if (candidates[w].sig > 1) continue; /* skip unused candidates */ exp10_logx(candidates[w].sig/log(10.0), m, e, 3); printf ("%5d %6.1f %c%9.0f %5.3fe%+04.0f ", w, candidates[w].lambda * wps(dataset, w), (candidates[w].pal ? 'P' : ' '), candidates[w].ll, m, e); printf("%-*.*s\n", tail, tail, candidates[w].cons); for (i=tail; i < w; i+=tail) printf("%*.*s%-*.*s\n", hdr, hdr, "", tail, tail, candidates[w].cons+i); }} /* print_candidates *//**********************************************************************//* print_dataset_summary*//**********************************************************************/extern void print_dataset_summary ( DATASET *dataset /* structure containing sequences */){ int i, pcol; int w = MSN + 15; char *datafile = dataset->datafile; /* name of the training set file */ char *alphabet = dataset->alphabet; /* alphabet of sequences */ char *negfile = dataset->negfile; /* name of negative example file */ /* set up printing spacers */ Resize(stars, PAGEWIDTH+1, char); for (i=0; i<PAGEWIDTH; i++) { stars[i] = '*'; } stars[PAGEWIDTH] = '\0'; /* announce the training set */ printf("%s\nTRAINING SET\n%s\n", stars, stars); /* print name of file and alphabet */ printf("DATAFILE= %s\n""ALPHABET= %s\n", datafile, alphabet ); /* print name of negative dataset */ if (negfile){ printf("NEGATIVES= %s\n", negfile); } /* print a table of sequence lengths */ /* print table header */ for (pcol = w; pcol < 80; pcol += w) { printf("%-*.*s %6s %6s%2s", MSN, MSN, "Sequence name", "Weight", "Length", " "); } printf("\n"); for (pcol = w; pcol < 80; pcol += w) { printf("%-*.*s %6s %6s%2s", MSN, MSN, "-------------", "------", "------", " "); } printf("\n"); /* print table columns */ pcol = 0; for (i=0; i<dataset->n_samples; i++) { SAMPLE *sample = dataset->samples[i]; char *sample_name = sample->sample_name; double wgt = sample->sw; int lseq = sample->length; /* print the sample name and its length */ pcol += w; /* new line for print out? */ if (pcol >= 80) { printf("\n"); pcol = w; } printf("%-*.*s %6.4f %6d%2s", MSN, MSN, sample_name, wgt, lseq, " "); } /* finish section */ printf("\n%s\n\n", stars);} /* print_dataset_summary *//**********************************************************************//* print_command_summary Print the command line summary*//**********************************************************************/extern void print_command_summary( MODEL *model, /* the model */ DATASET *dataset /* the dataset */){ int i, pcol; char evt_string[12]; if (dataset->evt == BIG) { strcpy(evt_string, "inf"); } else { sprintf(evt_string, "%8g", dataset->evt); } fprintf(stdout, "%s\nCOMMAND LINE SUMMARY\n%s\n""This information can also be useful in the event you wish to report a\n""problem with the MEME software.\n\n""command: %s\n\n""model: mod= %8s nmotifs= %8d evt= %8s\n""object function= %s\n""width: minw= %8d maxw= %8d minic= %8.2f\n", stars, stars, dataset->command, dataset->mod, dataset->nmotifs, evt_string, (dataset->objfun == Pv ? "P-value of product of p-values" : "E-value of product of p-values"), model->min_w, model->max_w, dataset->min_ic); if (dataset->ma_adj) { fprintf(stdout,"width: wg= %8g ws= %8g endgaps= %8s\n", dataset->wg, dataset->ws, yesno[dataset->endgaps]); } fprintf(stdout,"nsites: minsites= %8g maxsites= %8g wnsites= %8g\n""theta: prob= %8g spmap= %8s spfuzz= %8g\n" "em: prior= %9s b= %8g maxiter= %8d\n"" distance= %8g\n""data: n= %8d N= %8d\n", model->min_nsites, model->max_nsites, dataset->wnsites, dataset->prob, dataset->mapname, dataset->map_scale, dataset->priorname, dataset->beta, dataset->maxiter, dataset->distance, dataset->total_res, dataset->n_samples ); if (!strcmp(dataset->alphabet, DNA0)) { fprintf(stdout, "strands: +"); if (model->invcomp) fprintf(stdout, " -"); } fprintf(stdout,"\n""sample: seed= %8d seqfrac= %8g\n", dataset->seed, dataset->seqfrac); if (dataset->plib_name) { fprintf(stdout, "Dirichlet mixture priors file: %s\n", dataset->plib_name); } /* print dataset frequencies of letters in alphabet */ fprintf(stdout, "Letter frequencies in dataset:\n"); for (i=0, pcol=0; i<dataset->alength; i++) { pcol += 8; /* start of next printed thing */ if (pcol >= PAGEWIDTH) {pcol=8; fprintf(stdout, "\n");} fprintf(stdout, "%c %5.3f ", dataset->alphabet[i], dataset->res_freq[i]); } /* print background frequencies of letters in alphabet */ fprintf(stdout, "\nBackground letter frequencies (from %s):\n", dataset->bfile ? dataset->bfile : "dataset with add-one prior applied"); for (i=0, pcol=0; i<dataset->alength; i++) { pcol += 8; /* start of next printed thing */ if (pcol >= PAGEWIDTH) {pcol=8; fprintf(stdout, "\n");} fprintf(stdout, "%c %5.3f ", dataset->alphabet[i], dataset->back[i]); } fprintf(stdout, "\n%s\n", stars);} /* print_command_summary *//**********************************************************************//* print_meme_doc Print documentation for MEME. Note: file meme.doc is taken by cut-and-paste from meme-output.html.*//**********************************************************************/extern void print_meme_doc( char *meme_directory /* meme source directory */){ char *name = NULL; int s1; FILE *doc; /* make the name of the meme documentation file */ s1 = strlen(meme_directory); Resize(name, s1 + 10, char); strcpy(name, meme_directory); strcpy(name + s1, "/etc/meme.doc"); /* open the documentation file */ doc = fopen(name, "r"); if (doc == NULL) { fprintf(stderr, "Unable to open MEME's documentation file `%s'.\n", name); exit(1); } printf("\n"); PSTARS; printf("EXPLANATION OF RESULTS\n"); PSTARS; /* copy the documentation file to standard output */ cpyfile(doc, stdout); PSTARS;} /* print_meme_doc *//**********************************************************************//* meme_score_sequence Compute the sequence score for a motif. Returns the sequence score.*//**********************************************************************/static double meme_score_sequence( char *eseq, /* integer-coded sequence to score */ int length, /* length of the sequence */ int w, /* width of motif */ LOGODDS logodds1, /* log-odds matrix: LOG2(m_ij/b_j) */ LOGODDS logodds2 /* log-odds matrix: LOG2(m_ij/n_ij) */){ int i, j; double best = LITTLE; /* sequence score */ double score, sc1, sc2; double loge2 = log(2); /* score the sequence with motif */ for (i=0; i <= length - w; i++) { /* site start */ /* calculate score of subsequence */ for (j=0, sc1=0, sc2=0; j<w; j++) { /* position in sequence */ sc1 += logodds1(j, (int) eseq[i+j]); if (logodds2) sc2 += logodds2(j, (int) eseq[i+j]); } /* subsequence */ score = logodds2 ? -LOGL_SUM(-sc1*loge2, -sc2*loge2)/loge2 : sc1; best = MAX(score, best); } /* site start */ return best;} /* meme_score_sequence *//**********************************************************************//* get_thresh Get the optimal threshold for minimizing classification error by classifying positive and negative data using the motif, sorting and finding the minimum error. Returns optimal threshold, error rate and ROC.*//**********************************************************************/static ACCURACY *get_thresh( int w, /* width of motif */ LOGODDS logodds1, /* log-odds matrix: LOG2(m_ij/b_j) */ LOGODDS logodds2, /* log-odds matrix: LOG2(m_ij/a_ij) */ DATASET *pos, /* positive examples */ DATASET *neg, /* negative examples */ BOOLEAN print_scores /* print sorted scores */){ int i, class, iseq; int err; /* sum of false pos. and false neg. */ int min_pos, max_pos; /* best cutoff index in sorted list */ int best_err; /* best classification error */ double thresh; /* best threshold */ SORTED_SCORE *scores=NULL; /* array of class/score */ int npos = pos->n_samples; int nneg = neg->n_samples; int nseqs = npos + nneg; /* number of sequences */ DATASET *dataset; /* for ROC */ double roc; /* receiver operating characteristic */ double tpp, fpp; /* true, false positive proportions */ double tp, fp; /* true, false positives so far */ double newtpp, newfpp; ACCURACY *acc = NULL; double minposscore; /* minimum score of a positive */ double maxnegscore; /* maximum score of a negative */ /* allocate space for accuracy record */ Resize(acc, 1, ACCURACY); /* allocate space for scores */ Resize(scores, nseqs, SORTED_SCORE); /* score sequences */ for (class=0, iseq = 0; class<2; class++) { if (class) dataset = pos; else dataset = neg; for (i=0; i<dataset->n_samples; i++) { SAMPLE *s = dataset->samples[i]; /* sample */ char *eseq = s->res; /* integer-coded sequence */ int lseq = s->length; /* length of sequence */ if (lseq < w) continue; /* sequence too short */ scores[iseq].class = class; scores[iseq].score = meme_score_sequence(eseq, lseq, w, logodds1, logodds2); scores[iseq].id = s->sample_name; iseq++; } } /* sort sequences by score in descending order */ qsort(scores, nseqs, sizeof(SORTED_SCORE), s_compare); /* get threshold with minimum classification error If a range of thresholds give the same error, choose the average. */ roc = tpp = fpp = tp = fp = 0 ; /* for ROC */ best_err = err = pos->n_samples; /* all false negatives */ min_pos = 0; /* threshold must be below this */ max_pos = 0; /* threshold must be above this */ minposscore = BIG; /* smallest score of positives */ maxnegscore = -BIG; /* smallest score of negatives */ for (i=0; i<nseqs; i++) { if (scores[i].class) { /* positive */ err--; /* one fewer fn */ tp++; minposscore = scores[i].score; } else { /* negative */ err++; /* one more fp */ fp++; maxnegscore = MAX(maxnegscore, scores[i].score); } if (err < best_err) { /* start new range */ best_err = err; min_pos = max_pos = i; } else if (err == best_err) { /* extend current range */ max_pos = i; } /* ROC trapezoidal rule : (y2 - y1)/2 dx */ newtpp = tp / npos; newfpp = fp / nneg; roc += .5 * (newtpp + tpp) * (newfpp - fpp); tpp = newtpp; fpp = newfpp; } max_pos = MIN(max_pos+1, nseqs-1); thresh = (scores[min_pos].score + scores[max_pos].score)/2; /* normalize by fpp to get ROC */ if (fpp == 0) { roc = 1.0; } else { roc /= fpp; } /* add difference between positives and negatives if ROC is 1.0 */ if (roc == 1.0) roc += minposscore - maxnegscore; /* print the sorted list */ if (print_scores) { printf("ROC= %f\n", roc); for (i=0; i<nseqs; i++) printf("%-*.*s %1d %g\n", MSN, MSN, scores[i].id, scores[i].class, scores[i].score); } acc->thresh = thresh; acc->err = best_err; acc->roc = roc; return acc;} /* get_thresh *//**********************************************************************//* s_compare Compare two scores in descending order. Return <0 >0 if the first is <, > the second. If they are equal, resolves ties by returning <0 if the first has smaller class.*//**********************************************************************/static int s_compare( const void *v1, const void *v2){ const SORTED_SCORE * s1 = (const SORTED_SCORE *) v1; const SORTED_SCORE * s2 = (const SORTED_SCORE *) v2; double diff = s1->score - s2->score; if (diff == 0) diff = (double) (s1->class - s2->class); return ((diff > 0) ? -1 : ( (diff < 0) ? 1 : 0) );} /* s_compare *//**********************************************************************//* get_q Get the value of q which gives the optimal ROC on the training set.*//**********************************************************************/static double get_q( int nsteps, /* try nsteps from 0 to 1 */ int window, /* smoothing window radius */ int w, /* width of motif */ THETA theta, /* motif theta */ THETA neg_theta, /* anti-motif theta */ double *back, /* background motif */ DATASET *dataset, /* the dataset */ DATASET *neg_dataset, /* negative examples */ char *str_space /* space for printing strand direction */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -