display.c

来自「EM算法的改进」· C语言代码 · 共 1,677 行 · 第 1/4 页
1,677 行
/*	print_block_diagrams	Tile the dataset sequences with the motifs in los[] and print	the block diagrams with the p-value of the product of p-values.*//**********************************************************************/static void print_block_diagrams(  MODEL *model,				/* the model */  DATASET *dataset,			/* the dataset */  LO *los[MAXG],			/* logodds structures for motifs */  int nmotifs, 				/* number of motifs */  double *pv[MAXG],			/* p-value tables for each motif */  FILE *outfile){  int i;  BOOLEAN dna = dataset->dna;		/* dataset is DNA */  BOOLEAN invcomp = model->invcomp;	/* use reverse complement strand */  STYPE stype = dna ? (invcomp ? Combine : Norc) : Protein;  BOOLEAN xlate_dna = FALSE;		/* don't translate DNA */  BOOLEAN best_motifs = FALSE;		/* use all motifs */  double m_thresh = 1e-4;		/* show motifs over p-value 0.0001 */  double w_thresh = m_thresh;		/* show strong motifs only */  BOOLEAN use_seq_p = FALSE;		/* use postion p-values */  char *f = "%-*s%s %8s  %s\n";		/* format */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, "\tCombined block diagrams:");  fprintf(outfile, " non-overlapping sites with p-value < %6.4f\n", m_thresh);  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fputc('\n', outfile);  fprintf(outfile, f, MSN, "SEQUENCE NAME", "", "COMBINED P-VALUE", "MOTIF DIAGRAM");  fprintf(outfile, f, MSN, "-------------", "", "----------------", "-------------");  for (i=0; i<dataset->n_samples; i++) {    SAMPLE *s = dataset->samples[i];    char *name = s->sample_name;    int lseq = s->length;    char *sequence = s->seq;    TILING tiling = score_tile_diagram(sequence, lseq, los, nmotifs,      dna, stype, xlate_dna, best_motifs, TRUE, pv, m_thresh, w_thresh,       use_seq_p, FALSE, NULL);    fprintf(outfile, "%-*.*s %16.2e  %s\n", MSN, MSN, name, tiling.pv,      tiling.diagram);    free_tiling(tiling);  } /* sequence */  /* print a final line of hyphens */  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile); fprintf(outfile, "\n\n");  } /* print_block_diagrams *//**********************************************************************//*	print_log_odds	Print the log-odds matrix *//**********************************************************************/static void print_log_odds(  int imotif,			/* motif number */   DATASET *dataset,		/* the dataset */  int w,			/* width of motif */  BOOLEAN pair,			/* double matrix if true */  LOGODDS logodds,		/* log-odds matrix */  double bayes,  		/* threshold */  double log_ev			/* log E-value of motif */){  int i, j;  int alength = dataset->alength;	/* length of alphabet */  int n = wps(dataset, w);		/* weighted possible sites */  char *type = pair ? "pair" : "";	/* type of matrix */  double m1, e1;			/* for printing significance */  FILE *outfile = stdout;  /* get E-value of motif */  exp10_logx(log_ev/log(10.0), m1, e1, 1);  /* double w if printing a matrix pair */  if (pair) w *=2;  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile);  fprintf(outfile, "\n\tMotif %d position-specific scoring matrix\n", imotif);  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile);  fprintf(outfile, "\n");  fprintf(outfile,     "log-odds matrix: alength= %d w= %d n= %d bayes= %g E= %3.1fe%+04.0f %s\n",     alength, w, n, bayes, m1, e1, type);  for (i=0; i < w; i++) {		/* site position */    for (j=0; j < alength; j++) {	/* letter */      fprintf(outfile, "%6d ", NINT(100*logodds(i,j)));    }    fprintf(outfile, "\n");  }  for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile);  fprintf(outfile, "\n\n");} /* print_log_odds *//**********************************************************************//*	print_entropy		Display the relative entropy of each column of the motif	as a bar graph.*//**********************************************************************/static void print_entropy(  MODEL *model,			/* the model */  DATASET *dataset,		/* the dataset */  char *str_space,		/* space for printing strand direction */  FILE *outfile			/* stream for output */){  int i, j;  int w = model->w;				/* width of motif */  THETA obs = model->obs;			/* observed frequencies */  double *rentropy = model->rentropy;		/* IC of each column */  double ic = model->rel * w;			/* motif information content */  double *back = dataset->back;			/* background model */  int alength = dataset->alength;  char icstring[15];				/* print string for ic */  char *consensus;				/* consensus strings */  double min_freq;				/* minimum background freq */  double maxre;					/* maximum relative entropy */  int nsteps;					/* number of steps histogram */  /* get minimum background frequency and maximum relative entropy */  for (i=0, min_freq=1; i<alength; i++)      if (back[i] < min_freq) min_freq = back[i];  maxre = -LOG2(min_freq);		/* maximum relative entropy */    /* create string containing IC */  sprintf(icstring, "(%.1f bits)", ic);   /* print the relative entropy of each column as a bar graph */  nsteps = 10;  for (i=0; i<nsteps; i++) {    double level = maxre - (i * maxre/nsteps);    fprintf(outfile, (i==0 ? "%*.*s %*.1f " : "%-*.*s %*.1f "), IND, IND,       (i==0 ? "bits" : i==4 ? "Information" : i==5 ? "content" :         i==6 ? icstring : ""), IND2, level);    for (j=0; j<w; j++) {      if (NINT(nsteps * rentropy[j] / maxre) >= nsteps-i) {        fputc('*', outfile);      } else {        fputc(' ', outfile);      }    }    fputc('\n', outfile);  }  fprintf(outfile, "%-*.*s %*.1f ", IND, IND, "", IND2,0.0);  for (i=0; i<w; i++) fputc('-', outfile);  fprintf(outfile, "\n\n");  /* get and print the consensus sequences */  consensus = get_consensus(obs, w, dataset, MAXDEPTH, MINCONS);  for (i=0; i<MAXDEPTH && i<alength; i++) {/* print next levels of consensus */    fprintf(outfile, "%-*.*s %*.0s %*.*s\n", IND, IND,       (i==0 ? "Multilevel" : i == 1 ? "consensus" : i == 2 ? "sequence" : ""),       IND2, "", w, w, consensus+(i*w));  }  /* free up space */  myfree(consensus);} /* print_entropy *//**********************************************************************//*	print_theta		format=1		floating point; pos x letter		format=2		1 digit; letter x pos 	Print the probability array.*//**********************************************************************/extern void print_theta(  int imotif,		/* number of motif */  int format,		/* 1 = floating point              		   2 = integer */   int nsites,		/* number of sites (discrete) */  THETA theta,		/* theta */  int w,		/* width of motif */  double log_ev,	/* log motif E-value */  char *str_space,	/* space for printing strand direction */  DATASET *dataset,	/* the dataset */   FILE *outfile	 	/* file to print to */){  int i, j;  int alength = dataset->alength;  if (format == 1) {    double e1, m1;    exp10_logx(log_ev/log(10.0), m1, e1, 3);    for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile);    fprintf(outfile, "\n\tMotif %d position-specific probability matrix\n",       imotif);    for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile);    fprintf(outfile,      "\nletter-probability matrix: alength= %d w= %d nsites= %d E= %3.1fe%+04.0f ",      alength, w, nsites, m1, e1);    fprintf(outfile, "\n");    for (i=0; i < w; i++) {      for (j=0; j < alength; j++) {	fprintf(outfile, "%9.6f ", theta(i, j));      }      fprintf(outfile, "\n");    }    for (i=0; i<PAGEWIDTH; i++) fputc('-', outfile);    fprintf(outfile, "\n");  } else if (format == 2) {    /* print theta: rows = letter; cols = position in motif; 1-digit integer */    for (i=0; i < alength; i++) {      /* print the letter */      fprintf(outfile, "%-*.*s%*c  ", IND, IND,         (i==0 ? "Simplified" : i==1 ? "pos.-specific" : i==2 ? 	  "probability" : i==3 ? "matrix" : "" ), IND2, unhash(i));	      for (j=0; j < w; j++) {	int k = NINT(10.0 * theta(j,i));	/* round to 1 digit */        if (k == 0) {          fprintf(outfile, ":");		/* print 0 as colon */        } else {          fprintf(outfile, "%1x", k);		/* print 1 digit */        }      }      fprintf(outfile, "\n");    }  }  fprintf(outfile, "\n");}/**********************************************************************//*	print_zij*//**********************************************************************/extern void print_zij(  DATASET *dataset,			/* the dataset */  MODEL *model				/* the model */){  int i, j, k;  int n_samples = dataset->n_samples;  SAMPLE **samples = dataset->samples;  FILE *out=stdout;  fprintf(out, "z_ij: lambda=%f ll=%f\n", model->lambda, model->ll);  for (i=0; i<n_samples; i++) {			/* sequence */    int lseq = samples[i]->length;    int w = model->w;    fprintf(out, ">%s\nz : ", samples[i]->sample_name);    for (j=0; j<lseq-w+1; j++) {		/* position */      int zij = NINT(10 * samples[i]->z[j]);	/* round z */      fprintf(out, "%1x", zij);      /*fprintf(out, "%11.8g ", samples[i]->z[j]);*/    }    fprintf(out, "\n");    /* print sz if more than one strand */    if (model->invcomp) {      for (k=0; k<2; k++) {        printf("s%d: ", k);	for (j=0; j<lseq-w+1; j++) {		/* position */	  int sijk = NINT(10 * samples[i]->sz[k][j]);	/* round sz */	  printf("%1x", sijk);	} /* site */        printf("\n");      } /* strand */    }  } /* sequence */  printf("\n");} /* print_zij *//**********************************************************************//*	print_wij*//**********************************************************************/extern void print_wij(  DATASET *dataset			/* the dataset */){  int i,j;  int n_samples = dataset->n_samples;  SAMPLE **samples = dataset->samples;  printf("w_ij:\n");  for (i=0; i<n_samples; i++) {               /* sequence */    int len = samples[i]->length;    double *weights = samples[i]->weights;    printf(">%s\n", samples[i]->sample_name);    for (j=0; j<len; j++) {                   /* position */      int w = NINT(10 * weights[j]);      printf("%1x", w);    }    printf("\n");  }  printf("\n");}/**********************************************************************//*      get_consensus        Get the consensus string from a motif.	For each position, N consensus letters are found.	If no letter has probability > min_prob,         'x' is written for the first consensus	letter and ' ' in the others.        Otherwise, N letters are written in decreasing order of	probability until one with min_prob is reached, and then ' ' is 	written for the remaining letters.*//**********************************************************************/extern char *get_consensus(  THETA theta,			/* motif theta */  int w,			/* width of motif */  DATASET *dataset,		/* the dataset */  int N,			/* number of letters for each position */  double min_prob		/* minimum cumulative prob for N letters */){  int i, j, n;  int alength = dataset->alength;  char *alphabet = dataset->alphabet;  char *string = NULL;  Resize(string, w*N+2, char);  for (i=0; i < w; i++) {		/* position in motif */    int maxj[MAXDEPTH];			/* array of max indices in Theta */    /* find N letters at position i with highest probability (in order) */    for (n = 0; n < N; n++) {		/* current depth */	      double max = LITTLE;		/* current max probability */      for (j=0; j < alength; j++) {	/* letter */	if (theta(i, j) > max) {	  max = theta(i, j);		/* maximum probability */	  maxj[n] = j;			/* current letter with n-th best prob */	}      }      theta(i, maxj[n]) = -theta(i, maxj[n]);	/* tag this position as used */    }     /* restore theta */    for (n = 0; n < N; n++) {		/* current depth */	      theta(i, maxj[n]) = -theta(i, maxj[n]);   /* untag */	    }    /* set up the consensus strings for position i */    for (n = 0; n < N; n++) { 			/* current depth */	      if (theta(i, maxj[n]) < min_prob) {        string[(n*w)+i] = (n==0 ? 'x' : ' ');	/* below cutoff */      } else {         string[(n*w)+i] = alphabet[maxj[n]]; 	/* set n'th consensus */      }    }  }  string[((N-1)*w)+i] = '\0';			/* terminate string */  return string;} /* get_consensus *//**********************************************************************//*      get_regexp	Get a regular expression using the same rules as for	the multilevel consensus sequence.*//**********************************************************************/static char *get_regexp(  MODEL *model,			/* the model */  DATASET *dataset,		/* the dataset */  int prosite                   /* prosite ==1 [AK]-E-[EG]. ==0 [AK]E[EG] */){  THETA obs = model->obs;	/* motif observed theta */  int w = model->w;		/* width of motif */  int i, j;  char *pcons = get_consensus(obs, w, dataset, MAXDEPTH, MINCONS);  char *re = NULL;		/* RE string to return */  /* regular expression string */  Resize(re, w*(MAXDEPTH+6), char);  /* create the regular expression from the "packed" consensus */  int pos = 0;  for (i=0; i<w; i++) {    /* see if there is more than one letter in consensus for this column */     if (i>0 && prosite==1) re[pos++] = '-';    if (MAXDEPTH > 1 && pcons[w + i] != ' ') re[pos++] = '[';    for (j=0; j<MAXDEPTH; j++) {	/* copy consensus */      char a = pcons[w*j + i];      if (a == ' ') break;      re[pos++] = a;    }    if (MAXDEPTH > 1 && pcons[w + i] != ' ') re[pos++] = ']';  }  if (prosite==1) re[pos++] = '.';  re[pos++] = '\0';        return re;} /* get_regexp *//**********************************************************************//*	print_candidates	Print the candidate models found.*//**********************************************************************/static void print_candidates(  CANDIDATE *candidates, 	/* list of candidate models */  DATASET *dataset,		/* the dataset */  int max_w			/* maximum width for motifs */
display.c - 源码说明

本页面展示了「EM算法的改进」中的 display.c 源码文件，采用 C语言编程语言编写，共 1,677 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与法的改进相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?