background.c
来自「EM算法的改进」· C语言 代码 · 共 569 行 · 第 1/2 页
C
569 行
/* * $Id: background.c 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.1 2005/07/25 23:49:10 nadya * Initial revision * *//***************************************************************************** ** MEME++ ** Copyright 1994-1999, The Regents of the University of California ** Author: Timothy L. Bailey ** *****************************************************************************/#include "macros.h"#include "background.h"#include "hash_alph.h"/* index from ASCII to integer and back */static int a2i_index[MAXASCII];static int i2a_index[MAXASCII];static int index_alen; /* length of alphabet */#define a2i(a) a2i_index[(int)(a)]#define i2a(i) i2a_index[(int)(i)]static int setup_index( char *alpha /* the alphabet */);static int s2i( char *string /* the string to index */);static char *check_prob( BOOLEAN add_x, /* add x-tuples if TRUE */ double *a_p, /* tuple->prob assoc. array */ int n, /* widest tuple */ char *tuple, /* call with "" */ int tuplew, /* tuple width; call with 0 */ char *alpha /* alphabet */); static void average_rc( BOOLEAN add_x, /* add x-tuples if TRUE */ double *a_p, /* tuple->prob assoc. array */ int n, /* widest tuple */ char *tuple, /* call with "" */ int tuplew, /* tuple width; call with 0 */ char *alpha /* alphabet */);static void add_x_tuples( double *a_p, /* tuple->prob assoc. array */ char *ntuple, /* non-X-containing tuple */ int ntuplew, /* ntuple width */ int ntuplei, /* ntuple index */ char *xtuple, /* X-containing tuple */ int xtuplew, /* xtuple width */ int xcnt /* count of X's in xtuple */);static void print_prob( double *a_p, /* tuple->prob assoc. array */ int n, /* widest tuple */ char *tuple, /* call with "" */ int tuplew, /* tuple width; call with 0 */ char *alpha /* alphabet */); static double *get_cond_prob ( double *a_p, /* tuple->prob array */ int n /* length of array */);/***************************************************************************//* read_markov_model Read a file of tuple probabilities defining a Markov model. Check that all required tuples are present. File must have format: [<tuple> <p>]+ and may have comment lines beginning with "#" in column 1. If the file is NULL and freq is not, uses the 0-order frequencies in freq as the model, after adding X if requested. Sets the order of the model. Returns array: cp[s2i(wa)] = Pr(a | w) where "a" is a character and "w" is a string. The 0-order probabilities are in positions 0..alength-1 of the array.*//***************************************************************************/extern double *read_markov_model( char *pfile, /* name of probability file */ double *freq, /* letter frequencies */ char *alpha, /* alphabet expected */ BOOLEAN add_x, /* add x-tuples if TRUE */ BOOLEAN rc, /* average reverse complements*/ int *order /* order of model read */) { int i; /* index into array */ double a_p[MAX_BACK_SIZE]; /* tuple-prob array */ double *a_cp=NULL; /* conditional prob. array */ FILE *pfilep; /* file pointer to file */ char *line=NULL; /* line buffer */ char **fields=NULL; /* fields of line */ int nfields; /* number of fields in line */ int line_no=0; /* line number */ char *tuple; /* the tuple */ double p; /* the probability */ int maxw=0; /* maximum tuple width */ int alen=strlen(alpha); /* length of alphabet */ int ntuples; /* number of tuples */ /* check input */ if (!pfile && !freq) { fprintf(stderr, "read_markov_model error: specify pfile or freq\n"); exit(1); } /* add 'X 'to the alphabet if requested */ if (add_x) { char *tmp = NULL; Resize(tmp, alen+2, char); strcpy(tmp, alpha); tmp[alen] = 'X'; tmp[alen+1] = '\0'; alpha = tmp; alen++; } /* setup the mapping from ascii to integer and back */ setup_index(alpha); /* use the frequencies if given */ if (freq) { /* frequencies given */ Resize(a_cp, alen, double); for (i=0; i<alen-add_x; i++) RND(freq[i], 8, a_cp[i]); if (add_x) a_cp[i] = 1.0; /* Pr(X) */ /* averate reverse complement probabilities together if requested */ if (rc) average_rc(add_x, a_cp, 1, "", 0, alpha); return(a_cp); } /* initialize probability array */ for (i=0; i<MAX_BACK_SIZE; i++) a_p[i] = -1; /* read in the probabilities and save indexed by uppercase tuple name */ if (!(pfilep = fopen(pfile, "r"))) { fprintf(stderr, "Unable to open file %s for reading.\n", pfile); exit(1); } /*fprintf(stderr, "Reading background probabilities...\n");*/ while (1) { /* read file */ int len, index; line_no++; Getline(pfilep, line, len); /* read next line */ if (!line) break; /* at EOF */ if (line[0] == '#') continue; /* skip comments */ Split(line, fields, nfields); /* get tuple and prob */ if (nfields != 2) { fprintf(stderr, "Formatting error in file %s line %d: %s\n", pfile, line_no, line); exit(1); } tuple = fields[0]; p = atof(fields[1]); if (p<0 || p>1) { fprintf(stderr, "Illegal probability in file %s line %d: %s\n", pfile, line_no, line); } len = strlen(tuple); maxw = MAX(len, maxw); index = s2i(tuple); if (index < 0) { fprintf(stderr, "Illegal character in word `%s' in file %s line %d: %s\n", tuple, pfile, line_no, line); exit(1); } if (index >= MAX_BACK_SIZE) { for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i); fprintf(stderr, "Background model too large. Use smaller model or increase \nMAX_BACK_SIZE to at least %d in background.h and recompile.\n", ntuples); exit(1); } a_p[index] = p; /* store probability */ } fclose(pfilep); /* check that all necessary probabilities are defined */ /*fprintf(stderr, "Checking that all necessary probabilities are defined...\n"); */ tuple = check_prob(add_x, a_p, maxw, "", 0, alpha); if (tuple) { fprintf(stderr, "File %s gives no probability for %s.\n", pfile, tuple); exit(1); } /* averate reverse complement probabilities together if requested */ if (rc) average_rc(add_x, a_p, maxw, "", 0, alpha); *order = maxw - 1; /* order of Markov model */ /* get conditional probabilities */ for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i); a_cp = get_cond_prob(a_p, ntuples); /* print the probabilities */#ifdef DEBUG print_prob(a_cp, maxw, "", 0, alpha);#endif return(a_cp); /* return conditionals */} /* read_markov_model *//***************************************************************************//* check_prob Recursively check that all probabilities are defined for all tuples up to width n. Add their probabilities to all the X-containing tuples they match. Returns NULL if OK, otherwise the missing tuple. *//***************************************************************************/static char *check_prob( BOOLEAN add_x, /* add x-tuples if TRUE */ double *a_p, /* tuple->prob assoc. array */ int n, /* widest tuple */ char *tuple, /* call with "" */ int tuplew, /* tuple width; call with 0 */ char *alpha /* alphabet */) { int i; char *t = NULL, *x = NULL; /* tuple and x-tuple */ char *missing; /* missing tuple */ int ti; /* index of tuple */ if (n==0) return(NULL); /* everything is OK */ Resize(t, tuplew+2, char); Resize(x, tuplew+2, char); for(i=0; alpha[i+add_x]; i++) { /* ignore last letter (X) */ /* append letter to tuple */ strcpy(t, tuple); t[tuplew] = alpha[i]; t[tuplew+1] = '\0'; /* check that tuple exists */ ti = s2i(t); /* index of tuple */ if (a_p[ti] < 0) return(t); /* add the current tuple's probability to matching X-containing tuples */ if (add_x) add_x_tuples(a_p, t, tuplew+1, ti, x, 0, 0); /* recur */ missing = check_prob(add_x, a_p, n-1, t, tuplew+1, alpha); if (missing) return(missing); } /* letter */ myfree(t); if (add_x) myfree(x); return(NULL); /* all found */} /* check_prob *//***************************************************************************//* average_rc Recursively average the probabilities of reverse complement tuples. *//***************************************************************************/static void average_rc( BOOLEAN add_x, /* add x-tuples if TRUE */ double *a_p, /* tuple->prob assoc. array */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?