background.c

来自「EM算法的改进」· C语言 代码 · 共 569 行 · 第 1/2 页

C
569
字号
/* * $Id: background.c 1339 2006-09-21 19:46:28Z tbailey $ *  * $Log$ * Revision 1.1  2005/07/25 23:49:10  nadya * Initial revision * *//*****************************************************************************                                                                           **       MEME++                                                              **       Copyright 1994-1999, The Regents of the University of California    **       Author: Timothy L. Bailey                                           **                                                                           *****************************************************************************/#include "macros.h"#include "background.h"#include "hash_alph.h"/* index from ASCII to integer and back */static int a2i_index[MAXASCII];static int i2a_index[MAXASCII];static int index_alen;				/* length of alphabet */#define a2i(a) a2i_index[(int)(a)]#define i2a(i) i2a_index[(int)(i)]static int setup_index(  char *alpha					/* the alphabet */);static int s2i(  char *string					/* the string to index */);static char *check_prob(  BOOLEAN add_x,				/* add x-tuples if TRUE */  double  *a_p,					/* tuple->prob assoc. array */  int n,					/* widest tuple */  char *tuple,					/* call with "" */  int tuplew,					/* tuple width; call with 0 */  char *alpha 					/* alphabet */); static void average_rc(  BOOLEAN add_x,				/* add x-tuples if TRUE */  double *a_p,					/* tuple->prob assoc. array */  int n,					/* widest tuple */  char *tuple,					/* call with "" */  int tuplew,					/* tuple width; call with 0 */  char *alpha 					/* alphabet */);static void add_x_tuples(  double *a_p,					/* tuple->prob assoc. array */  char *ntuple,					/* non-X-containing tuple */  int ntuplew, 					/* ntuple width */  int ntuplei, 					/* ntuple index */  char *xtuple,					/* X-containing tuple */  int xtuplew, 					/* xtuple width */  int xcnt					/* count of X's in xtuple */);static void print_prob(  double *a_p,					/* tuple->prob assoc. array */  int n,					/* widest tuple */  char *tuple,					/* call with "" */  int tuplew,					/* tuple width; call with 0 */  char *alpha 					/* alphabet */); static double *get_cond_prob (  double *a_p, 					/* tuple->prob array */  int n						/* length of array */);/***************************************************************************//*	read_markov_model	Read a file of tuple probabilities defining a Markov model.  	Check that all required tuples are present.  	File must have format:		[<tuple> <p>]+	and may have comment lines beginning with "#" in column 1.	If the file is NULL and freq is not, uses the 0-order	frequencies in freq as the model, after adding X if requested.	Sets the order of the model.	Returns array: 		cp[s2i(wa)] = Pr(a | w)	where "a" is a character and "w" is a string.	The 0-order probabilities are in positions 0..alength-1 of the	array.*//***************************************************************************/extern double *read_markov_model(   char *pfile, 					/* name of probability file */  double *freq,					/* letter frequencies */  char *alpha,					/* alphabet expected */  BOOLEAN add_x,				/* add x-tuples if TRUE */  BOOLEAN rc,					/* average reverse complements*/  int *order					/* order of model read */) {  int i;					/* index into array */  double a_p[MAX_BACK_SIZE];			/* tuple-prob array */  double *a_cp=NULL; 				/* conditional prob. array */  FILE *pfilep;					/* file pointer to file */  char *line=NULL;				/* line buffer */  char **fields=NULL;				/* fields of line */  int nfields;					/* number of fields in line */  int line_no=0;				/* line number */  char *tuple;					/* the tuple */  double p;					/* the probability */  int maxw=0;					/* maximum tuple width */  int alen=strlen(alpha);			/* length of alphabet */  int ntuples;					/* number of tuples */  /* check input */  if (!pfile && !freq) {    fprintf(stderr, "read_markov_model error: specify pfile or freq\n");    exit(1);  }  /* add 'X 'to the alphabet if requested */  if (add_x) {    char *tmp = NULL;    Resize(tmp, alen+2, char);    strcpy(tmp, alpha);    tmp[alen] = 'X'; tmp[alen+1] = '\0';    alpha = tmp;    alen++;   }  /* setup the mapping from ascii to integer and back */  setup_index(alpha);  /* use the frequencies if given */  if (freq) {					/* frequencies given */    Resize(a_cp, alen, double);    for (i=0; i<alen-add_x; i++) RND(freq[i], 8, a_cp[i]);    if (add_x) a_cp[i] = 1.0;			/* Pr(X) */    /* averate reverse complement probabilities together if requested */    if (rc) average_rc(add_x, a_cp, 1, "", 0, alpha);     return(a_cp);  }  /* initialize probability array */  for (i=0; i<MAX_BACK_SIZE; i++) a_p[i] = -1;  /* read in the probabilities and save indexed by uppercase tuple name */  if (!(pfilep = fopen(pfile, "r"))) {    fprintf(stderr, "Unable to open file %s for reading.\n", pfile);    exit(1);  }  /*fprintf(stderr, "Reading background probabilities...\n");*/  while (1) {					/* read file */    int len, index;    line_no++;    Getline(pfilep, line, len);			/* read next line */    if (!line) break;				/* at EOF */    if (line[0] == '#') continue;		/* skip comments */    Split(line, fields, nfields);		/* get tuple and prob */    if (nfields != 2) {      fprintf(stderr,         "Formatting error in file %s line %d: %s\n", pfile, line_no, line);      exit(1);    }    tuple = fields[0];    p = atof(fields[1]);    if (p<0 || p>1) {      fprintf(stderr, "Illegal probability in file %s line %d: %s\n",         pfile, line_no, line);    }    len = strlen(tuple);    maxw = MAX(len, maxw);    index = s2i(tuple);    if (index < 0) {      fprintf(stderr, "Illegal character in word `%s' in file %s line %d: %s\n",        tuple, pfile, line_no, line);      exit(1);    }    if (index >= MAX_BACK_SIZE) {      for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i);      fprintf(stderr, "Background model too large.  Use smaller model or increase \nMAX_BACK_SIZE to at least %d in background.h and recompile.\n", ntuples);      exit(1);    }    a_p[index] = p;				/* store probability */  }  fclose(pfilep);  /* check that all necessary probabilities are defined */  /*fprintf(stderr,     "Checking that all necessary probabilities are defined...\n");  */  tuple = check_prob(add_x, a_p, maxw, "", 0, alpha);   if (tuple) {     fprintf(stderr, "File %s gives no probability for %s.\n", pfile,       tuple);    exit(1);  }  /* averate reverse complement probabilities together if requested */  if (rc) average_rc(add_x, a_p, maxw, "", 0, alpha);   *order = maxw - 1;				/* order of Markov model */  /* get conditional probabilities */  for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i);  a_cp = get_cond_prob(a_p, ntuples);  /* print the probabilities */#ifdef DEBUG  print_prob(a_cp, maxw, "", 0, alpha);#endif  return(a_cp);					/* return conditionals */} /* read_markov_model *//***************************************************************************//*  	check_prob  	Recursively check that all probabilities are defined for all tuples	up to width n.  Add their probabilities to all the X-containing	tuples they match. 	Returns NULL if OK, otherwise the missing tuple. *//***************************************************************************/static char *check_prob(  BOOLEAN add_x,				/* add x-tuples if TRUE */  double *a_p,					/* tuple->prob assoc. array */  int n,					/* widest tuple */  char *tuple,					/* call with "" */  int tuplew,					/* tuple width; call with 0 */  char *alpha 					/* alphabet */) {  int i;  char *t = NULL, *x = NULL; 			/* tuple and x-tuple */  char *missing;				/* missing tuple */  int ti;					/* index of tuple */  if (n==0) return(NULL);			/* everything is OK */  Resize(t, tuplew+2, char);  Resize(x, tuplew+2, char);    for(i=0; alpha[i+add_x]; i++) {		/* ignore last letter (X) */    /* append letter to tuple */    strcpy(t, tuple); t[tuplew] = alpha[i]; t[tuplew+1] = '\0';    /* check that tuple exists */    ti = s2i(t);				/* index of tuple */    if (a_p[ti] < 0) return(t);    /* add the current tuple's probability to matching X-containing tuples */    if (add_x) add_x_tuples(a_p, t, tuplew+1, ti, x, 0, 0);    /* recur */    missing = check_prob(add_x, a_p, n-1, t, tuplew+1, alpha);    if (missing) return(missing);  } /* letter */  myfree(t);  if (add_x) myfree(x);  return(NULL);					/* all found */} /* check_prob *//***************************************************************************//*  	average_rc  	Recursively average the probabilities of reverse complement tuples. *//***************************************************************************/static void average_rc(  BOOLEAN add_x,				/* add x-tuples if TRUE */  double *a_p,					/* tuple->prob assoc. array */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?