⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 display.c

📁 EM算法的改进
💻 C
📖 第 1 页 / 共 4 页
字号:
){  int i, j;  double *roc = NULL;				/* array to hold roc(q) */  double smooth_roc;				/* smoothed value of roc */  double best_roc;				/* maximum roc */  double q=0;					/* mixing parameter */  double incr = 1.0/nsteps;			/* increment for q */  LOGODDS logodds;				/* motif log-odds matrix */  ACCURACY *acc;				/* get_thresh struct */  int alength = dataset->alength;		/* length of alphabet */  /* create ROC array */  Resize(roc, nsteps+1, double);  /*     get roc for different values of q   */  for (i=0; i<=nsteps; i++) {    q = i * incr;    logodds = make_log_odds(theta, neg_theta, back, q, w, alength);    acc = get_thresh(w, logodds, NULL, dataset, neg_dataset, FALSE);    roc[i] = acc->roc;				/* save roc for this q */    myfree(acc);				/* free up space */  } /* get roc */  /*     smooth ROC and find q that gives maximum  */  best_roc = 0;  for (i=0; i<=nsteps; i++) {    double avg = 0;    int cnt = 0;    for (j=MAX(0,i-window); j<=MIN(nsteps, i+window); j++) {      avg += roc[j];      cnt++;    }    smooth_roc = avg/cnt;    if (smooth_roc > best_roc) {      best_roc = smooth_roc;      q = i * incr;    }    /*printf("q= %8.3f smoothed_roc= %8.5f\n", i*incr, smooth_roc);*/  } /* smooth ROC and get max */  myfree(roc);					/* release space */  printf("Q= %8.3f ROC= %8.3f\n", q, best_roc);   return q;} /**********************************************************************//*	print_sites	Print the sites making up the model.*//**********************************************************************/static void print_sites(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  int format,				/* 0=BLOCKS; 1=FASTA */  char *com,				/* comment to append */  FILE *outfile				/* where to print */){  int i, j;  int w = model->w;			/* width of motif */  P_PROB sites = model->maxima;		/* sites "defining" model */  int n = model->nsites_dis;		/* number of sites */  char *ftype = (format==0 ? "BLOCKS" : "FASTA");  /* print header */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, "\tMotif %d in %s format%s\n", model->imotif, ftype, com);  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  /* print the sites */  if (format == 0) fprintf(outfile, "BL   MOTIF %d width=%d seqs=%d\n",     model->imotif, w, n);  for (i=0; i<n; i++) {			/* print each site */    int seqno = sites[i].x;		/* sequence number */    SAMPLE *s = dataset->samples[seqno];/* sequence */    BOOLEAN ic = sites[i].ic;		/* strand direction */    int y = sites[i].y;			/* location of site */    int off = ic ? s->length-w-y : y;	/* - strand offset from rgt. */    char *res = ic ? s->resic+off : s->res+off;       /* integer sequence */    double weight = s->sw;		/* sequence weight */    /*double weight = sites[i].prob;*/    /* print sequence name and position of site */    if (format == 0) {			/* BLOCKS format */      fprintf(outfile, "%-*.*s ( %4d) ", MSN, MSN, s->sample_name, y+1);    } else {				/* FASTA format */      fprintf(outfile,">%-*.*s pos %4d\n", MSN, MSN, s->sample_name, y+1);    }    /* print site */    for (j=0; j<w; j++) { fputc(unhash(res[j]), outfile); }    if (format == 0) {			/* BLOCKS format */      fprintf(outfile, "  %g ", weight);    }    fputc('\n', outfile);  } /* print each site */  if (format == 0) {    fprintf(outfile, "//\n\n");  }  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile);  fprintf(outfile, "\n\n");} /* print_sites *//**********************************************************************//*	print_summary	Print the summary of all the motifs.*//**********************************************************************/extern void print_summary(  MODEL *model,				/* the model */  DATASET *dataset,			/* the dataset */  LO **los,				/* the LO structures */  int nmotifs,				/* number of motifs */  double **pv,				/* p-value of score distribution */  FILE *outfile				/* where to print */){  /* print the motif block diagrams using all the motifs */  fprintf(outfile, "\n\n%s\nSUMMARY OF MOTIFS\n%s\n\n", stars, stars);  print_block_diagrams(model, dataset, los, nmotifs, pv, outfile);  fprintf(outfile, "%s\n\n", stars);} /* print_summary *//**********************************************************************//*	score_sites	Score and get the pvalues of the sites in the model.	Sort in order of increasing p-value.*//**********************************************************************/static void score_sites(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  LO *lo,				/* LO structure */  double *pv 				/* p-values for scores of this motif */){  int isite;  P_PROB sites = model->maxima;		/* sites "defining" model */  int n = model->nsites_dis;		/* number of sites */  BOOLEAN invcomp = model->invcomp;	/* use reverse complement strand, too */  STYPE stype = invcomp ? Combine : Protein;	/* Protein works for DNA too */  SCORE **scores = NULL;		/* the site scores */  int old_seqno = -1;  for (isite=0; isite<n; isite++) {	/* site */    int seqno = sites[isite].x;		/* sequence number */    int y = sites[isite].y;		/* location of site */    SAMPLE *s = dataset->samples[seqno];/* sequence */    int lseq = s->length;		/* length of sequence */    char *seq = s->seq;			/* the ascii sequence */    double pvalue; 			/* score p-value */     /* score the sequence if new */    if (old_seqno != seqno) {      BOOLEAN xlate_dna = FALSE;	/* not xlating */      if (old_seqno >= 0) free_2array(scores, 1);      scores = score_sequence(stype, xlate_dna, seq, lseq, 1, &lo);      old_seqno = seqno;      s->minpv = 1.0;    }    pvalue = pv[(int) scores[0][y].score];	/* p-value */    /* save MINUS the p-value in the .prob field of sites */    sites[isite].prob = -pvalue;    /* update minimum p-value of sites */    if (pvalue < s->minpv) s->minpv = pvalue;  } /* get p-values of sites */  free_2array(scores, 1);               /* free space */  /*    sort the sites by p-value  */  qsort((char *) sites, n, sizeof(p_prob), pY_compare);  /*    change sign of p-values back  */  for (isite=0; isite<n; isite++) sites[isite].prob *= -1;} /* score_sites *//**********************************************************************//*	print_site_diagrams	Make block diagrams of the actual sites in the model	and print them.	Sequences are sorted by the minimum p-value of sites in them.*//**********************************************************************/static void print_site_diagrams(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  int nmotifs,				/* number of motifs in los */  LO *los[MAXG],			/* logodds structure for motif */  FILE *outfile				/* where to print */){  int i, j, isite;  P_PROB sites = model->maxima;		/* sites "defining" model */  int n = model->nsites_dis;		/* number of sites */  int nseqs = dataset->n_samples;	/* number of sequences in dataset */  BOOLEAN dna = dataset->dna;		/* dataset is DNA if true */  BOOLEAN invcomp = model->invcomp;	/* use reverse complement strand, too */  BOOLEAN xlate_dna = FALSE;		/* don't translate */  BOOLEAN best_motifs = FALSE;		/* use all sites */  double m_thresh = 1;			/* show all sites as strong */  STYPE stype = dna ? (invcomp ? Combine : Norc) : Protein;  int nseqs_with_sites;			/* number of sequences with sites */  int *seqlist = NULL;			/* ordered list of sequences w/sites */  int *hits = NULL;			/* store hits */  double *pvalues = NULL;		/* store pvalues */  char *f = "%-*s%s %8s  %s\n";		/* format */  /*    Create the list to contain sequences sorted by minimum p-value  */  Resize(seqlist, nseqs, int);  /*    Clear list of sites for each sequence   */  for (i=0; i<nseqs; i++) dataset->samples[i]->nsites = 0;  /*    Find which sequences have sites and create list of sites for each  */  for (isite=nseqs_with_sites=0; isite<n; isite++) {	/* site */    int seqno = sites[isite].x;		/* sequence number */    int y = sites[isite].y + 1;		/* location of site (plus 1) */    int ic = sites[isite].ic;		/* site on reverse complement strand */    SAMPLE *s = dataset->samples[seqno];/* sequence */    /* record the sequence as containing a site */    if (!s->nsites) seqlist[nseqs_with_sites++] = seqno;    /* store the site in its list of sites */    Resize(s->sites, s->nsites+1, int);    s->sites[(s->nsites)++] = ic ? -y : y;	/* +/- site offset by 1 */  } /* site */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, "\tMotif %d block diagrams\n", nmotifs);  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, f, MSN, "SEQUENCE NAME", "", "POSITION P-VALUE", "MOTIF DIAGRAM");  fprintf(outfile, f, MSN, "-------------", "", "----------------", "-------------");  /*    create and print a block diagram for each sequence with sites   */  for (i=0; i<nseqs_with_sites; i++) {	/* sequence */    int seqno = seqlist[i];		/* current sequence */    SAMPLE *s = dataset->samples[seqno];/* sequence */    int lseq = s->length;		/* length of sequence */    char *name = s->sample_name;	/* name of sequence */    double minpv = s->minpv;		/* minimum p-value of sites */    char hdr[MSN+20];			/* header */    TILING tiling;			/* tiling struct */    /*      create storage for hits and pvalues and clear them    */    Resize(hits, lseq, int);    Resize(pvalues, lseq, double);    for (j=0; j<lseq; j++) { hits[j] = 0; pvalues[j] = 0; }    /* copy hits from s->nsites into hits array */    for (j=0; j<s->nsites; j++) {      int y = abs(s->sites[j]) - 1;	/* position of site */      int m = (s->sites[j] > 0) ? los[nmotifs-1]->name : -los[nmotifs-1]->name;      hits[y] = m;			/* +/- motif */    }    /* put the hits in TILING struct */    tiling.hits = hits;    tiling.pvalues = pvalues;    /* create the block diagram */    tiling.diagram = create_diagram(dna, stype, xlate_dna, best_motifs, FALSE,      m_thresh, nmotifs, los, lseq, FALSE, NULL, tiling);    /* print the diagram */    sprintf(hdr, "%-*.*s %16.2g  ", MSN, MSN, name, minpv);    print_diagram(tiling.diagram, hdr, outfile);    myfree(tiling.diagram);		/* release space */  } /* sequence */  /* print a final line of hyphens */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n");    myfree(seqlist);  myfree(hits);  myfree(pvalues);} /* print_site_diagrams *//**********************************************************************//*	align_sites	Align all sites that make up the model.*//**********************************************************************/static void align_sites(  DATASET *dataset,			/* the dataset */  MODEL *model,				/* the model */  LO *lo,				/* LO structure */  double *pv,				/* pvalues for scores of this motif */  FILE *outfile				/* stream for output */){  int i, ii, isite;  int w = model->w;			/* length of site */  P_PROB sites = model->maxima;		/* sites "defining" model */  int n = model->nsites_dis;		/* number of sites */  BOOLEAN invcomp = model->invcomp;	/* use reverse complement strand, too */  int imotif = lo->name;		/* name of motif */  char site[MAXSITE+1], pre[10+1], post[10+1];  /* print header */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile,    "\tMotif %d sites sorted by position p-value\n", imotif);  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, "%-*.*s%s ", MSN, MSN, "Sequence name",     invcomp ? " Strand" : "");  fprintf(outfile, "%6s %9s %10s %*sSite%*s\n",     "Start", "P-value", "", w/2 - 2, "", w - w/2 - 4, "");  fprintf(outfile, "%-*.*s%s ", MSN, MSN, "-------------",     invcomp ? " ------" : "");  fprintf(outfile, "%6s %8s %10s ", "-----", "---------", "");  for (i=0; i<w; i++) fputc('-', outfile);  fputc('\n', outfile);  /*     print sites that make up the model  */  for (isite=0; isite<n; isite++) {	/* site */    int seqno = sites[isite].x;		/* sequence number */    int y = sites[isite].y;		/* location of site */    BOOLEAN ic = sites[isite].ic;	/* strand direction */    double pvalue = sites[isite].prob;	/* position p-value */    SAMPLE *s = dataset->samples[seqno];/* sequence */    int lseq = s->length;		/* length of sequence */    char *seq = s->seq;			/* the ascii sequence */    char *sample_name = s->sample_name;	/* name of sample */    /* print name and strand */    fprintf(outfile, "%-*.*s%s ", MSN, MSN, sample_name,       invcomp ? (ic ? "     -" : "     +") : "");    /* print position and position p-value */    fprintf(outfile, "%6d %9.2e", y+1, pvalue);    /* get the aligned sequence parts */    if (!ic) {				/* + strand */      /* pre */      for (i=y-10, ii=0; i<y; i++) {	if (i<0) continue;	pre[ii++] = seq[i];      }      pre[ii] = '\0';      /* site */      for (i=y, ii=0; ii<w; i++)  site[ii++] = seq[i];       site[ii] = '\0';      /* post */      for (i=y+w, ii=0; ii<10 && i<lseq; i++) post[ii++] = seq[i];      post[ii] = 0;    } else {				/* - strand */      /* pre */      for (i=y+w+9, ii=0; i>=y+w; i--) {	if (i>=lseq) continue;	pre[ii++] = comp_dna(seq[i]);      }      pre[ii] = '\0';      /* site */      for (i=y+w-1, ii=0; ii<w; i--) site[ii++] = comp_dna(seq[i]);      site[ii] = '\0';      /* post */      for (i=y-1, ii=0; ii<10 && i>=0; i--) post[ii++] = comp_dna(seq[i]);      post[ii] = '\0';    } /* strand */    /* print the alignment */    if (pre[0] == '\0') {			/* print a dot in empty pre */      fprintf(outfile, " %10s %-*s %-10s\n", ".", w, site, post);    } else {      fprintf(outfile, " %10s %-*s %-10s\n", pre, w, site, post);    }  } /* site */  /* print line of hyphens */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n");} /* align_sites */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -