display.c

来自「EM算法的改进」· C语言代码 · 共 1,677 行 · 第 1/4 页
1,677 行
){  int i, w;  int hdr = 35;  int tail = PAGEWIDTH - hdr;  double m, e;			/* for printing signficance */  printf("\nCandidate motifs:\n");  printf("width nsites  ll        signif     consensus\n");  printf("----- ------  --------- ---------- ---------\n");  for (w=1; w<=max_w; w++) {    if (candidates[w].sig > 1) continue;	/* skip unused candidates */    exp10_logx(candidates[w].sig/log(10.0), m, e, 3);    printf ("%5d %6.1f %c%9.0f %5.3fe%+04.0f ",       w,       candidates[w].lambda * wps(dataset, w),       (candidates[w].pal ? 'P' : ' '),      candidates[w].ll,      m, e);    printf("%-*.*s\n", tail, tail, candidates[w].cons);    for (i=tail; i < w; i+=tail) printf("%*.*s%-*.*s\n",       hdr, hdr, "", tail, tail, candidates[w].cons+i);  }} /* print_candidates *//**********************************************************************//*	print_dataset_summary*//**********************************************************************/extern void print_dataset_summary (  DATASET *dataset 			/* structure containing sequences */){  int i, pcol;  int w = MSN + 15;  char *datafile = dataset->datafile;	/* name of the training set file */  char *alphabet = dataset->alphabet;	/* alphabet of sequences */  char *negfile	= dataset->negfile;	/* name of negative example file */  /* set up printing spacers */  Resize(stars, PAGEWIDTH+1, char);  for (i=0; i<PAGEWIDTH; i++) {    stars[i] = '*';  }  stars[PAGEWIDTH] = '\0';  /* announce the training set */  printf("%s\nTRAINING SET\n%s\n", stars, stars);  /* print name of file and alphabet */  printf("DATAFILE= %s\n""ALPHABET= %s\n", datafile, alphabet  );  /* print name of negative dataset */  if (negfile){ printf("NEGATIVES= %s\n", negfile); }  /*    print a table of sequence lengths   */  /*   print table header */  for (pcol = w; pcol < 80; pcol += w) {    printf("%-*.*s %6s %6s%2s", MSN, MSN,       "Sequence name", "Weight", "Length", " ");  }  printf("\n");  for (pcol = w; pcol < 80; pcol += w) {    printf("%-*.*s %6s %6s%2s", MSN, MSN,       "-------------", "------", "------", " ");  }  printf("\n");  /*   print table columns */  pcol = 0;   for (i=0; i<dataset->n_samples; i++) {    SAMPLE *sample = dataset->samples[i];    char *sample_name = sample->sample_name;    double wgt = sample->sw;    int lseq = sample->length;    /* print the sample name and its length */    pcol += w;          /* new line for print out? */    if (pcol >= 80) { printf("\n"); pcol = w; }    printf("%-*.*s %6.4f %6d%2s", MSN, MSN, sample_name, wgt, lseq, " ");  }  /* finish section */  printf("\n%s\n\n", stars);} /* print_dataset_summary *//**********************************************************************//*	print_command_summary	Print the command line summary*//**********************************************************************/extern void print_command_summary(  MODEL *model,			/* the model */  DATASET *dataset 		/* the dataset */){  int i, pcol;  char evt_string[12];  if (dataset->evt == BIG) {    strcpy(evt_string, "inf");  } else {    sprintf(evt_string, "%8g", dataset->evt);  }  fprintf(stdout, "%s\nCOMMAND LINE SUMMARY\n%s\n""This information can also be useful in the event you wish to report a\n""problem with the MEME software.\n\n""command: %s\n\n""model:  mod=      %8s    nmotifs=  %8d    evt=      %8s\n""object function=  %s\n""width:  minw=     %8d    maxw=     %8d    minic=    %8.2f\n",    stars, stars,    dataset->command,     dataset->mod, dataset->nmotifs, evt_string,    (dataset->objfun == Pv ?        "P-value of product of p-values" :        "E-value of product of p-values"),    model->min_w, model->max_w, dataset->min_ic);  if (dataset->ma_adj) {    fprintf(stdout,"width:  wg=       %8g    ws=       %8g    endgaps=  %8s\n",      dataset->wg, dataset->ws, yesno[dataset->endgaps]);  }  fprintf(stdout,"nsites: minsites= %8g    maxsites= %8g    wnsites=  %8g\n""theta:  prob=     %8g    spmap=    %8s    spfuzz=   %8g\n" "em:     prior=   %9s    b=        %8g    maxiter=  %8d\n""        distance= %8g\n""data:   n=        %8d    N=        %8d\n",     model->min_nsites, model->max_nsites, dataset->wnsites,    dataset->prob, dataset->mapname, dataset->map_scale,    dataset->priorname, dataset->beta, dataset->maxiter,    dataset->distance,    dataset->total_res, dataset->n_samples  );  if (!strcmp(dataset->alphabet, DNA0)) {    fprintf(stdout, "strands: +");    if (model->invcomp) fprintf(stdout, " -");  }  fprintf(stdout,"\n""sample: seed=     %8d    seqfrac=  %8g\n", dataset->seed, dataset->seqfrac);  if (dataset->plib_name) {    fprintf(stdout, "Dirichlet mixture priors file: %s\n", dataset->plib_name);  }  /* print dataset frequencies of letters in alphabet */  fprintf(stdout, "Letter frequencies in dataset:\n");  for (i=0, pcol=0; i<dataset->alength; i++) {    pcol += 8;          /* start of next printed thing */    if (pcol >= PAGEWIDTH) {pcol=8; fprintf(stdout, "\n");}    fprintf(stdout, "%c %5.3f ", dataset->alphabet[i], dataset->res_freq[i]);  }  /* print background frequencies of letters in alphabet */  fprintf(stdout, "\nBackground letter frequencies (from %s):\n",    dataset->bfile ? dataset->bfile : "dataset with add-one prior applied");  for (i=0, pcol=0; i<dataset->alength; i++) {    pcol += 8;          /* start of next printed thing */    if (pcol >= PAGEWIDTH) {pcol=8; fprintf(stdout, "\n");}    fprintf(stdout, "%c %5.3f ", dataset->alphabet[i], dataset->back[i]);  }  fprintf(stdout, "\n%s\n", stars);} /* print_command_summary *//**********************************************************************//*	print_meme_doc	Print documentation for MEME.	Note: file meme.doc is taken by cut-and-paste from 	meme-output.html.*//**********************************************************************/extern void print_meme_doc(  char *meme_directory			/* meme source directory */){  char *name = NULL;  int s1;  FILE *doc;  /* make the name of the meme documentation file */  s1 = strlen(meme_directory);  Resize(name, s1 + 10, char);  strcpy(name, meme_directory);  strcpy(name + s1, "/etc/meme.doc");  /* open the documentation file */  doc = fopen(name, "r");  if (doc == NULL) {    fprintf(stderr, "Unable to open MEME's documentation file `%s'.\n", name);    exit(1);  }  printf("\n");  PSTARS;  printf("EXPLANATION OF RESULTS\n");  PSTARS;  /* copy the documentation file to standard output */  cpyfile(doc, stdout);  PSTARS;} /* print_meme_doc *//**********************************************************************//*	meme_score_sequence	Compute the sequence score for a motif.	Returns the sequence score.*//**********************************************************************/static double meme_score_sequence(  char *eseq,		/* integer-coded sequence to score */  int length,		/* length of the sequence */  int w, 		/* width of motif */  LOGODDS logodds1, 	/* log-odds matrix: LOG2(m_ij/b_j) */  LOGODDS logodds2  	/* log-odds matrix: LOG2(m_ij/n_ij) */){  int i, j;  double best = LITTLE;			/* sequence score */  double score, sc1, sc2;  double loge2 = log(2);  /* score the sequence with motif */  for (i=0; i <= length - w; i++) {	/* site start */    /* calculate score of subsequence */    for (j=0, sc1=0, sc2=0; j<w; j++) {	/* position in sequence */      sc1 += logodds1(j, (int) eseq[i+j]);      if (logodds2) sc2 += logodds2(j, (int) eseq[i+j]);    } /* subsequence */    score = logodds2 ? -LOGL_SUM(-sc1*loge2, -sc2*loge2)/loge2 : sc1;    best = MAX(score, best);  } /* site start */  return best;} /* meme_score_sequence *//**********************************************************************//*	get_thresh	Get the optimal threshold for minimizing classification error	by classifying positive and negative data using the motif, 	sorting and finding the minimum error.	Returns optimal threshold, error rate and ROC.*//**********************************************************************/static ACCURACY *get_thresh(  int w,				/* width of motif */  LOGODDS logodds1,			/* log-odds matrix: LOG2(m_ij/b_j) */  LOGODDS logodds2,			/* log-odds matrix: LOG2(m_ij/a_ij) */  DATASET *pos,				/* positive examples */  DATASET *neg,				/* negative examples */  BOOLEAN print_scores			/* print sorted scores */){  int i, class, iseq;  int err;				/* sum of false pos. and false neg. */  int min_pos, max_pos;			/* best cutoff index in sorted list */  int best_err;				/* best classification error */  double thresh;			/* best threshold */  SORTED_SCORE *scores=NULL;		/* array of class/score */  int npos = pos->n_samples;  int nneg = neg->n_samples;  int nseqs = npos + nneg;		/* number of sequences */  DATASET *dataset;  /* for ROC */  double roc;				/* receiver operating characteristic */  double tpp, fpp;			/* true, false positive proportions */  double tp, fp;			/* true, false positives so far */  double newtpp, newfpp;  ACCURACY *acc = NULL;  double minposscore;			/* minimum score of a positive */   double maxnegscore;			/* maximum score of a negative */   /* allocate space for accuracy record */  Resize(acc, 1, ACCURACY);   /* allocate space for scores */  Resize(scores, nseqs, SORTED_SCORE);  /* score sequences */   for (class=0, iseq = 0; class<2; class++) {    if (class) dataset = pos; else dataset = neg;    for (i=0; i<dataset->n_samples; i++) {      SAMPLE *s = dataset->samples[i];	/* sample */      char *eseq = s->res;			/* integer-coded sequence */      int lseq = s->length;			/* length of sequence */      if (lseq < w) continue;			/* sequence too short */      scores[iseq].class = class;      scores[iseq].score = meme_score_sequence(eseq, lseq, w, logodds1,         logodds2);      scores[iseq].id = s->sample_name;      iseq++;    }  }  /* sort sequences by score in descending order */  qsort(scores, nseqs, sizeof(SORTED_SCORE), s_compare);  /*     get threshold with minimum classification error     If a range of thresholds give the same error, choose the average.  */  roc = tpp = fpp = tp = fp = 0 ;	/* for ROC */  best_err = err = pos->n_samples;	/* all false negatives */  min_pos = 0;				/* threshold must be below this */  max_pos = 0;				/* threshold must be above this */  minposscore = BIG;			/* smallest score of positives */  maxnegscore = -BIG;			/* smallest score of negatives */  for (i=0; i<nseqs; i++) {    if (scores[i].class) {			/* positive */      err--;					/* one fewer fn */      tp++;      minposscore = scores[i].score;    } else {					/* negative */      err++;					/* one more fp */      fp++;      maxnegscore = MAX(maxnegscore, scores[i].score);    }    if (err < best_err) { 			/* start new range */      best_err = err;      min_pos = max_pos = i;    } else if (err == best_err) {		/* extend current range */      max_pos = i;    }    /* ROC trapezoidal rule : (y2 - y1)/2 dx */    newtpp = tp / npos;    newfpp = fp / nneg;    roc += .5 * (newtpp + tpp) * (newfpp - fpp);    tpp = newtpp;    fpp = newfpp;  }  max_pos = MIN(max_pos+1, nseqs-1);  thresh = (scores[min_pos].score + scores[max_pos].score)/2;  /* normalize by fpp to get ROC */  if (fpp == 0) {    roc = 1.0;  } else {    roc /= fpp;  }    /* add difference between positives and negatives if ROC is 1.0 */  if (roc == 1.0) roc += minposscore - maxnegscore;  /* print the sorted list */  if (print_scores) {    printf("ROC= %f\n", roc);    for (i=0; i<nseqs; i++) printf("%-*.*s %1d %g\n",       MSN, MSN, scores[i].id, scores[i].class, scores[i].score);   }    acc->thresh = thresh;  acc->err = best_err;  acc->roc = roc;  return acc;} /* get_thresh *//**********************************************************************//*        s_compare         Compare two scores in descending order.  Return <0 >0        if the first is <, > the second.  If they are equal,        resolves ties by returning <0 if the first has smaller class.*//**********************************************************************/static int s_compare(  const void *v1,  const void *v2){  const SORTED_SCORE * s1 = (const SORTED_SCORE *) v1;  const SORTED_SCORE * s2 = (const SORTED_SCORE *) v2;  double diff = s1->score - s2->score;  if (diff == 0) diff = (double) (s1->class - s2->class);  return ((diff > 0) ? -1 : ( (diff < 0) ? 1 : 0) );} /* s_compare *//**********************************************************************//*	get_q	Get the value of q which gives the optimal ROC on the training	set.*//**********************************************************************/static double get_q(  int nsteps,					/* try nsteps from 0 to 1 */  int window,					/* smoothing window radius */  int w,					/* width of motif */  THETA theta,					/* motif theta */  THETA neg_theta, 				/* anti-motif theta */  double *back,					/* background motif */  DATASET *dataset,				/* the dataset */  DATASET *neg_dataset,				/* negative examples */  char *str_space 		/* space for printing strand direction */
display.c - 源码说明

本页面展示了「EM算法的改进」中的 display.c 源码文件，采用 C语言编程语言编写，共 1,677 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与法的改进相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?