📄 display.c
字号:
){ int i, j; double *roc = NULL; /* array to hold roc(q) */ double smooth_roc; /* smoothed value of roc */ double best_roc; /* maximum roc */ double q=0; /* mixing parameter */ double incr = 1.0/nsteps; /* increment for q */ LOGODDS logodds; /* motif log-odds matrix */ ACCURACY *acc; /* get_thresh struct */ int alength = dataset->alength; /* length of alphabet */ /* create ROC array */ Resize(roc, nsteps+1, double); /* get roc for different values of q */ for (i=0; i<=nsteps; i++) { q = i * incr; logodds = make_log_odds(theta, neg_theta, back, q, w, alength); acc = get_thresh(w, logodds, NULL, dataset, neg_dataset, FALSE); roc[i] = acc->roc; /* save roc for this q */ myfree(acc); /* free up space */ } /* get roc */ /* smooth ROC and find q that gives maximum */ best_roc = 0; for (i=0; i<=nsteps; i++) { double avg = 0; int cnt = 0; for (j=MAX(0,i-window); j<=MIN(nsteps, i+window); j++) { avg += roc[j]; cnt++; } smooth_roc = avg/cnt; if (smooth_roc > best_roc) { best_roc = smooth_roc; q = i * incr; } /*printf("q= %8.3f smoothed_roc= %8.5f\n", i*incr, smooth_roc);*/ } /* smooth ROC and get max */ myfree(roc); /* release space */ printf("Q= %8.3f ROC= %8.3f\n", q, best_roc); return q;} /**********************************************************************//* print_sites Print the sites making up the model.*//**********************************************************************/static void print_sites( DATASET *dataset, /* the dataset */ MODEL *model, /* the model */ int format, /* 0=BLOCKS; 1=FASTA */ char *com, /* comment to append */ FILE *outfile /* where to print */){ int i, j; int w = model->w; /* width of motif */ P_PROB sites = model->maxima; /* sites "defining" model */ int n = model->nsites_dis; /* number of sites */ char *ftype = (format==0 ? "BLOCKS" : "FASTA"); /* print header */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, "\tMotif %d in %s format%s\n", model->imotif, ftype, com); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); /* print the sites */ if (format == 0) fprintf(outfile, "BL MOTIF %d width=%d seqs=%d\n", model->imotif, w, n); for (i=0; i<n; i++) { /* print each site */ int seqno = sites[i].x; /* sequence number */ SAMPLE *s = dataset->samples[seqno];/* sequence */ BOOLEAN ic = sites[i].ic; /* strand direction */ int y = sites[i].y; /* location of site */ int off = ic ? s->length-w-y : y; /* - strand offset from rgt. */ char *res = ic ? s->resic+off : s->res+off; /* integer sequence */ double weight = s->sw; /* sequence weight */ /*double weight = sites[i].prob;*/ /* print sequence name and position of site */ if (format == 0) { /* BLOCKS format */ fprintf(outfile, "%-*.*s ( %4d) ", MSN, MSN, s->sample_name, y+1); } else { /* FASTA format */ fprintf(outfile,">%-*.*s pos %4d\n", MSN, MSN, s->sample_name, y+1); } /* print site */ for (j=0; j<w; j++) { fputc(unhash(res[j]), outfile); } if (format == 0) { /* BLOCKS format */ fprintf(outfile, " %g ", weight); } fputc('\n', outfile); } /* print each site */ if (format == 0) { fprintf(outfile, "//\n\n"); } for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n");} /* print_sites *//**********************************************************************//* print_summary Print the summary of all the motifs.*//**********************************************************************/extern void print_summary( MODEL *model, /* the model */ DATASET *dataset, /* the dataset */ LO **los, /* the LO structures */ int nmotifs, /* number of motifs */ double **pv, /* p-value of score distribution */ FILE *outfile /* where to print */){ /* print the motif block diagrams using all the motifs */ fprintf(outfile, "\n\n%s\nSUMMARY OF MOTIFS\n%s\n\n", stars, stars); print_block_diagrams(model, dataset, los, nmotifs, pv, outfile); fprintf(outfile, "%s\n\n", stars);} /* print_summary *//**********************************************************************//* score_sites Score and get the pvalues of the sites in the model. Sort in order of increasing p-value.*//**********************************************************************/static void score_sites( DATASET *dataset, /* the dataset */ MODEL *model, /* the model */ LO *lo, /* LO structure */ double *pv /* p-values for scores of this motif */){ int isite; P_PROB sites = model->maxima; /* sites "defining" model */ int n = model->nsites_dis; /* number of sites */ BOOLEAN invcomp = model->invcomp; /* use reverse complement strand, too */ STYPE stype = invcomp ? Combine : Protein; /* Protein works for DNA too */ SCORE **scores = NULL; /* the site scores */ int old_seqno = -1; for (isite=0; isite<n; isite++) { /* site */ int seqno = sites[isite].x; /* sequence number */ int y = sites[isite].y; /* location of site */ SAMPLE *s = dataset->samples[seqno];/* sequence */ int lseq = s->length; /* length of sequence */ char *seq = s->seq; /* the ascii sequence */ double pvalue; /* score p-value */ /* score the sequence if new */ if (old_seqno != seqno) { BOOLEAN xlate_dna = FALSE; /* not xlating */ if (old_seqno >= 0) free_2array(scores, 1); scores = score_sequence(stype, xlate_dna, seq, lseq, 1, &lo); old_seqno = seqno; s->minpv = 1.0; } pvalue = pv[(int) scores[0][y].score]; /* p-value */ /* save MINUS the p-value in the .prob field of sites */ sites[isite].prob = -pvalue; /* update minimum p-value of sites */ if (pvalue < s->minpv) s->minpv = pvalue; } /* get p-values of sites */ free_2array(scores, 1); /* free space */ /* sort the sites by p-value */ qsort((char *) sites, n, sizeof(p_prob), pY_compare); /* change sign of p-values back */ for (isite=0; isite<n; isite++) sites[isite].prob *= -1;} /* score_sites *//**********************************************************************//* print_site_diagrams Make block diagrams of the actual sites in the model and print them. Sequences are sorted by the minimum p-value of sites in them.*//**********************************************************************/static void print_site_diagrams( DATASET *dataset, /* the dataset */ MODEL *model, /* the model */ int nmotifs, /* number of motifs in los */ LO *los[MAXG], /* logodds structure for motif */ FILE *outfile /* where to print */){ int i, j, isite; P_PROB sites = model->maxima; /* sites "defining" model */ int n = model->nsites_dis; /* number of sites */ int nseqs = dataset->n_samples; /* number of sequences in dataset */ BOOLEAN dna = dataset->dna; /* dataset is DNA if true */ BOOLEAN invcomp = model->invcomp; /* use reverse complement strand, too */ BOOLEAN xlate_dna = FALSE; /* don't translate */ BOOLEAN best_motifs = FALSE; /* use all sites */ double m_thresh = 1; /* show all sites as strong */ STYPE stype = dna ? (invcomp ? Combine : Norc) : Protein; int nseqs_with_sites; /* number of sequences with sites */ int *seqlist = NULL; /* ordered list of sequences w/sites */ int *hits = NULL; /* store hits */ double *pvalues = NULL; /* store pvalues */ char *f = "%-*s%s %8s %s\n"; /* format */ /* Create the list to contain sequences sorted by minimum p-value */ Resize(seqlist, nseqs, int); /* Clear list of sites for each sequence */ for (i=0; i<nseqs; i++) dataset->samples[i]->nsites = 0; /* Find which sequences have sites and create list of sites for each */ for (isite=nseqs_with_sites=0; isite<n; isite++) { /* site */ int seqno = sites[isite].x; /* sequence number */ int y = sites[isite].y + 1; /* location of site (plus 1) */ int ic = sites[isite].ic; /* site on reverse complement strand */ SAMPLE *s = dataset->samples[seqno];/* sequence */ /* record the sequence as containing a site */ if (!s->nsites) seqlist[nseqs_with_sites++] = seqno; /* store the site in its list of sites */ Resize(s->sites, s->nsites+1, int); s->sites[(s->nsites)++] = ic ? -y : y; /* +/- site offset by 1 */ } /* site */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, "\tMotif %d block diagrams\n", nmotifs); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, f, MSN, "SEQUENCE NAME", "", "POSITION P-VALUE", "MOTIF DIAGRAM"); fprintf(outfile, f, MSN, "-------------", "", "----------------", "-------------"); /* create and print a block diagram for each sequence with sites */ for (i=0; i<nseqs_with_sites; i++) { /* sequence */ int seqno = seqlist[i]; /* current sequence */ SAMPLE *s = dataset->samples[seqno];/* sequence */ int lseq = s->length; /* length of sequence */ char *name = s->sample_name; /* name of sequence */ double minpv = s->minpv; /* minimum p-value of sites */ char hdr[MSN+20]; /* header */ TILING tiling; /* tiling struct */ /* create storage for hits and pvalues and clear them */ Resize(hits, lseq, int); Resize(pvalues, lseq, double); for (j=0; j<lseq; j++) { hits[j] = 0; pvalues[j] = 0; } /* copy hits from s->nsites into hits array */ for (j=0; j<s->nsites; j++) { int y = abs(s->sites[j]) - 1; /* position of site */ int m = (s->sites[j] > 0) ? los[nmotifs-1]->name : -los[nmotifs-1]->name; hits[y] = m; /* +/- motif */ } /* put the hits in TILING struct */ tiling.hits = hits; tiling.pvalues = pvalues; /* create the block diagram */ tiling.diagram = create_diagram(dna, stype, xlate_dna, best_motifs, FALSE, m_thresh, nmotifs, los, lseq, FALSE, NULL, tiling); /* print the diagram */ sprintf(hdr, "%-*.*s %16.2g ", MSN, MSN, name, minpv); print_diagram(tiling.diagram, hdr, outfile); myfree(tiling.diagram); /* release space */ } /* sequence */ /* print a final line of hyphens */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n"); myfree(seqlist); myfree(hits); myfree(pvalues);} /* print_site_diagrams *//**********************************************************************//* align_sites Align all sites that make up the model.*//**********************************************************************/static void align_sites( DATASET *dataset, /* the dataset */ MODEL *model, /* the model */ LO *lo, /* LO structure */ double *pv, /* pvalues for scores of this motif */ FILE *outfile /* stream for output */){ int i, ii, isite; int w = model->w; /* length of site */ P_PROB sites = model->maxima; /* sites "defining" model */ int n = model->nsites_dis; /* number of sites */ BOOLEAN invcomp = model->invcomp; /* use reverse complement strand, too */ int imotif = lo->name; /* name of motif */ char site[MAXSITE+1], pre[10+1], post[10+1]; /* print header */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, "\tMotif %d sites sorted by position p-value\n", imotif); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, "%-*.*s%s ", MSN, MSN, "Sequence name", invcomp ? " Strand" : ""); fprintf(outfile, "%6s %9s %10s %*sSite%*s\n", "Start", "P-value", "", w/2 - 2, "", w - w/2 - 4, ""); fprintf(outfile, "%-*.*s%s ", MSN, MSN, "-------------", invcomp ? " ------" : ""); fprintf(outfile, "%6s %8s %10s ", "-----", "---------", ""); for (i=0; i<w; i++) fputc('-', outfile); fputc('\n', outfile); /* print sites that make up the model */ for (isite=0; isite<n; isite++) { /* site */ int seqno = sites[isite].x; /* sequence number */ int y = sites[isite].y; /* location of site */ BOOLEAN ic = sites[isite].ic; /* strand direction */ double pvalue = sites[isite].prob; /* position p-value */ SAMPLE *s = dataset->samples[seqno];/* sequence */ int lseq = s->length; /* length of sequence */ char *seq = s->seq; /* the ascii sequence */ char *sample_name = s->sample_name; /* name of sample */ /* print name and strand */ fprintf(outfile, "%-*.*s%s ", MSN, MSN, sample_name, invcomp ? (ic ? " -" : " +") : ""); /* print position and position p-value */ fprintf(outfile, "%6d %9.2e", y+1, pvalue); /* get the aligned sequence parts */ if (!ic) { /* + strand */ /* pre */ for (i=y-10, ii=0; i<y; i++) { if (i<0) continue; pre[ii++] = seq[i]; } pre[ii] = '\0'; /* site */ for (i=y, ii=0; ii<w; i++) site[ii++] = seq[i]; site[ii] = '\0'; /* post */ for (i=y+w, ii=0; ii<10 && i<lseq; i++) post[ii++] = seq[i]; post[ii] = 0; } else { /* - strand */ /* pre */ for (i=y+w+9, ii=0; i>=y+w; i--) { if (i>=lseq) continue; pre[ii++] = comp_dna(seq[i]); } pre[ii] = '\0'; /* site */ for (i=y+w-1, ii=0; ii<w; i--) site[ii++] = comp_dna(seq[i]); site[ii] = '\0'; /* post */ for (i=y-1, ii=0; ii<10 && i>=0; i--) post[ii++] = comp_dna(seq[i]); post[ii] = '\0'; } /* strand */ /* print the alignment */ if (pre[0] == '\0') { /* print a dot in empty pre */ fprintf(outfile, " %10s %-*s %-10s\n", ".", w, site, post); } else { fprintf(outfile, " %10s %-*s %-10s\n", pre, w, site, post); } } /* site */ /* print line of hyphens */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n");} /* align_sites */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -