📄 display.c
字号:
/* print_block_diagrams Tile the dataset sequences with the motifs in los[] and print the block diagrams with the p-value of the product of p-values.*//**********************************************************************/static void print_block_diagrams( MODEL *model, /* the model */ DATASET *dataset, /* the dataset */ LO *los[MAXG], /* logodds structures for motifs */ int nmotifs, /* number of motifs */ double *pv[MAXG], /* p-value tables for each motif */ FILE *outfile){ int i; BOOLEAN dna = dataset->dna; /* dataset is DNA */ BOOLEAN invcomp = model->invcomp; /* use reverse complement strand */ STYPE stype = dna ? (invcomp ? Combine : Norc) : Protein; BOOLEAN xlate_dna = FALSE; /* don't translate DNA */ BOOLEAN best_motifs = FALSE; /* use all motifs */ double m_thresh = 1e-4; /* show motifs over p-value 0.0001 */ double w_thresh = m_thresh; /* show strong motifs only */ BOOLEAN use_seq_p = FALSE; /* use postion p-values */ char *f = "%-*s%s %8s %s\n"; /* format */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, "\tCombined block diagrams:"); fprintf(outfile, " non-overlapping sites with p-value < %6.4f\n", m_thresh); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile); fprintf(outfile, f, MSN, "SEQUENCE NAME", "", "COMBINED P-VALUE", "MOTIF DIAGRAM"); fprintf(outfile, f, MSN, "-------------", "", "----------------", "-------------"); for (i=0; i<dataset->n_samples; i++) { SAMPLE *s = dataset->samples[i]; char *name = s->sample_name; int lseq = s->length; char *sequence = s->seq; TILING tiling = score_tile_diagram(sequence, lseq, los, nmotifs, dna, stype, xlate_dna, best_motifs, TRUE, pv, m_thresh, w_thresh, use_seq_p, FALSE, NULL); fprintf(outfile, "%-*.*s %16.2e %s\n", MSN, MSN, name, tiling.pv, tiling.diagram); free_tiling(tiling); } /* sequence */ /* print a final line of hyphens */ for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n"); } /* print_block_diagrams *//**********************************************************************//* print_log_odds Print the log-odds matrix *//**********************************************************************/static void print_log_odds( int imotif, /* motif number */ DATASET *dataset, /* the dataset */ int w, /* width of motif */ BOOLEAN pair, /* double matrix if true */ LOGODDS logodds, /* log-odds matrix */ double bayes, /* threshold */ double log_ev /* log E-value of motif */){ int i, j; int alength = dataset->alength; /* length of alphabet */ int n = wps(dataset, w); /* weighted possible sites */ char *type = pair ? "pair" : ""; /* type of matrix */ double m1, e1; /* for printing significance */ FILE *outfile = stdout; /* get E-value of motif */ exp10_logx(log_ev/log(10.0), m1, e1, 1); /* double w if printing a matrix pair */ if (pair) w *=2; for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\tMotif %d position-specific scoring matrix\n", imotif); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n"); fprintf(outfile, "log-odds matrix: alength= %d w= %d n= %d bayes= %g E= %3.1fe%+04.0f %s\n", alength, w, n, bayes, m1, e1, type); for (i=0; i < w; i++) { /* site position */ for (j=0; j < alength; j++) { /* letter */ fprintf(outfile, "%6d ", NINT(100*logodds(i,j))); } fprintf(outfile, "\n"); } for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n");} /* print_log_odds *//**********************************************************************//* print_entropy Display the relative entropy of each column of the motif as a bar graph.*//**********************************************************************/static void print_entropy( MODEL *model, /* the model */ DATASET *dataset, /* the dataset */ char *str_space, /* space for printing strand direction */ FILE *outfile /* stream for output */){ int i, j; int w = model->w; /* width of motif */ THETA obs = model->obs; /* observed frequencies */ double *rentropy = model->rentropy; /* IC of each column */ double ic = model->rel * w; /* motif information content */ double *back = dataset->back; /* background model */ int alength = dataset->alength; char icstring[15]; /* print string for ic */ char *consensus; /* consensus strings */ double min_freq; /* minimum background freq */ double maxre; /* maximum relative entropy */ int nsteps; /* number of steps histogram */ /* get minimum background frequency and maximum relative entropy */ for (i=0, min_freq=1; i<alength; i++) if (back[i] < min_freq) min_freq = back[i]; maxre = -LOG2(min_freq); /* maximum relative entropy */ /* create string containing IC */ sprintf(icstring, "(%.1f bits)", ic); /* print the relative entropy of each column as a bar graph */ nsteps = 10; for (i=0; i<nsteps; i++) { double level = maxre - (i * maxre/nsteps); fprintf(outfile, (i==0 ? "%*.*s %*.1f " : "%-*.*s %*.1f "), IND, IND, (i==0 ? "bits" : i==4 ? "Information" : i==5 ? "content" : i==6 ? icstring : ""), IND2, level); for (j=0; j<w; j++) { if (NINT(nsteps * rentropy[j] / maxre) >= nsteps-i) { fputc('*', outfile); } else { fputc(' ', outfile); } } fputc('\n', outfile); } fprintf(outfile, "%-*.*s %*.1f ", IND, IND, "", IND2,0.0); for (i=0; i<w; i++) fputc('-', outfile); fprintf(outfile, "\n\n"); /* get and print the consensus sequences */ consensus = get_consensus(obs, w, dataset, MAXDEPTH, MINCONS); for (i=0; i<MAXDEPTH && i<alength; i++) {/* print next levels of consensus */ fprintf(outfile, "%-*.*s %*.0s %*.*s\n", IND, IND, (i==0 ? "Multilevel" : i == 1 ? "consensus" : i == 2 ? "sequence" : ""), IND2, "", w, w, consensus+(i*w)); } /* free up space */ myfree(consensus);} /* print_entropy *//**********************************************************************//* print_theta format=1 floating point; pos x letter format=2 1 digit; letter x pos Print the probability array.*//**********************************************************************/extern void print_theta( int imotif, /* number of motif */ int format, /* 1 = floating point 2 = integer */ int nsites, /* number of sites (discrete) */ THETA theta, /* theta */ int w, /* width of motif */ double log_ev, /* log motif E-value */ char *str_space, /* space for printing strand direction */ DATASET *dataset, /* the dataset */ FILE *outfile /* file to print to */){ int i, j; int alength = dataset->alength; if (format == 1) { double e1, m1; exp10_logx(log_ev/log(10.0), m1, e1, 3); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\tMotif %d position-specific probability matrix\n", imotif); for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\nletter-probability matrix: alength= %d w= %d nsites= %d E= %3.1fe%+04.0f ", alength, w, nsites, m1, e1); fprintf(outfile, "\n"); for (i=0; i < w; i++) { for (j=0; j < alength; j++) { fprintf(outfile, "%9.6f ", theta(i, j)); } fprintf(outfile, "\n"); } for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n"); } else if (format == 2) { /* print theta: rows = letter; cols = position in motif; 1-digit integer */ for (i=0; i < alength; i++) { /* print the letter */ fprintf(outfile, "%-*.*s%*c ", IND, IND, (i==0 ? "Simplified" : i==1 ? "pos.-specific" : i==2 ? "probability" : i==3 ? "matrix" : "" ), IND2, unhash(i)); for (j=0; j < w; j++) { int k = NINT(10.0 * theta(j,i)); /* round to 1 digit */ if (k == 0) { fprintf(outfile, ":"); /* print 0 as colon */ } else { fprintf(outfile, "%1x", k); /* print 1 digit */ } } fprintf(outfile, "\n"); } } fprintf(outfile, "\n");}/**********************************************************************//* print_zij*//**********************************************************************/extern void print_zij( DATASET *dataset, /* the dataset */ MODEL *model /* the model */){ int i, j, k; int n_samples = dataset->n_samples; SAMPLE **samples = dataset->samples; FILE *out=stdout; fprintf(out, "z_ij: lambda=%f ll=%f\n", model->lambda, model->ll); for (i=0; i<n_samples; i++) { /* sequence */ int lseq = samples[i]->length; int w = model->w; fprintf(out, ">%s\nz : ", samples[i]->sample_name); for (j=0; j<lseq-w+1; j++) { /* position */ int zij = NINT(10 * samples[i]->z[j]); /* round z */ fprintf(out, "%1x", zij); /*fprintf(out, "%11.8g ", samples[i]->z[j]);*/ } fprintf(out, "\n"); /* print sz if more than one strand */ if (model->invcomp) { for (k=0; k<2; k++) { printf("s%d: ", k); for (j=0; j<lseq-w+1; j++) { /* position */ int sijk = NINT(10 * samples[i]->sz[k][j]); /* round sz */ printf("%1x", sijk); } /* site */ printf("\n"); } /* strand */ } } /* sequence */ printf("\n");} /* print_zij *//**********************************************************************//* print_wij*//**********************************************************************/extern void print_wij( DATASET *dataset /* the dataset */){ int i,j; int n_samples = dataset->n_samples; SAMPLE **samples = dataset->samples; printf("w_ij:\n"); for (i=0; i<n_samples; i++) { /* sequence */ int len = samples[i]->length; double *weights = samples[i]->weights; printf(">%s\n", samples[i]->sample_name); for (j=0; j<len; j++) { /* position */ int w = NINT(10 * weights[j]); printf("%1x", w); } printf("\n"); } printf("\n");}/**********************************************************************//* get_consensus Get the consensus string from a motif. For each position, N consensus letters are found. If no letter has probability > min_prob, 'x' is written for the first consensus letter and ' ' in the others. Otherwise, N letters are written in decreasing order of probability until one with min_prob is reached, and then ' ' is written for the remaining letters.*//**********************************************************************/extern char *get_consensus( THETA theta, /* motif theta */ int w, /* width of motif */ DATASET *dataset, /* the dataset */ int N, /* number of letters for each position */ double min_prob /* minimum cumulative prob for N letters */){ int i, j, n; int alength = dataset->alength; char *alphabet = dataset->alphabet; char *string = NULL; Resize(string, w*N+2, char); for (i=0; i < w; i++) { /* position in motif */ int maxj[MAXDEPTH]; /* array of max indices in Theta */ /* find N letters at position i with highest probability (in order) */ for (n = 0; n < N; n++) { /* current depth */ double max = LITTLE; /* current max probability */ for (j=0; j < alength; j++) { /* letter */ if (theta(i, j) > max) { max = theta(i, j); /* maximum probability */ maxj[n] = j; /* current letter with n-th best prob */ } } theta(i, maxj[n]) = -theta(i, maxj[n]); /* tag this position as used */ } /* restore theta */ for (n = 0; n < N; n++) { /* current depth */ theta(i, maxj[n]) = -theta(i, maxj[n]); /* untag */ } /* set up the consensus strings for position i */ for (n = 0; n < N; n++) { /* current depth */ if (theta(i, maxj[n]) < min_prob) { string[(n*w)+i] = (n==0 ? 'x' : ' '); /* below cutoff */ } else { string[(n*w)+i] = alphabet[maxj[n]]; /* set n'th consensus */ } } } string[((N-1)*w)+i] = '\0'; /* terminate string */ return string;} /* get_consensus *//**********************************************************************//* get_regexp Get a regular expression using the same rules as for the multilevel consensus sequence.*//**********************************************************************/static char *get_regexp( MODEL *model, /* the model */ DATASET *dataset, /* the dataset */ int prosite /* prosite ==1 [AK]-E-[EG]. ==0 [AK]E[EG] */){ THETA obs = model->obs; /* motif observed theta */ int w = model->w; /* width of motif */ int i, j; char *pcons = get_consensus(obs, w, dataset, MAXDEPTH, MINCONS); char *re = NULL; /* RE string to return */ /* regular expression string */ Resize(re, w*(MAXDEPTH+6), char); /* create the regular expression from the "packed" consensus */ int pos = 0; for (i=0; i<w; i++) { /* see if there is more than one letter in consensus for this column */ if (i>0 && prosite==1) re[pos++] = '-'; if (MAXDEPTH > 1 && pcons[w + i] != ' ') re[pos++] = '['; for (j=0; j<MAXDEPTH; j++) { /* copy consensus */ char a = pcons[w*j + i]; if (a == ' ') break; re[pos++] = a; } if (MAXDEPTH > 1 && pcons[w + i] != ' ') re[pos++] = ']'; } if (prosite==1) re[pos++] = '.'; re[pos++] = '\0'; return re;} /* get_regexp *//**********************************************************************//* print_candidates Print the candidate models found.*//**********************************************************************/static void print_candidates( CANDIDATE *candidates, /* list of candidate models */ DATASET *dataset, /* the dataset */ int max_w /* maximum width for motifs */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -