likelihood.c
来自「EM算法的改进」· C语言 代码 · 共 341 行
C
341 行
/* * $Id: likelihood.c 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.2 2005/10/25 19:06:39 nadya * rm old macro for Header, all info is taken care of by Id and Log. * * Revision 1.1.1.1 2005/07/29 00:21:43 nadya * Importing from meme-3.0.14, and adding configure/make * *//*#define DEBUG*//**********************************************************************//* Likelihood and entropy functions.*//**********************************************************************//* 7-14-00 tlb; use back for rentropy *//* 6-23-00 tlb; add Ev objective function */#ifdef lrt_standalone#define DEFINE_GLOBALS#endif#include "meme.h"/* constants*/ static int *len = NULL;/* functions*/extern int int_compare( const void *v1, const void *v2);#ifdef OBSOLETEstatic double log_non_overlap_comb( int m, /* length of sequence */ int n, /* number of segments */ int w /* width of segment */); #endif/**********************************************************************//* get_log_sig Calculate the statistical significance of the alignment given its score and the type of objective function in force. If N>0, returns log E-value. If N==0, returns log p-value.*//**********************************************************************/extern double get_log_sig( double score, /* score of alignment */ MTYPE mtype, /* type of model */ int w, /* width of motif */ double wN, /* weighted number of sites */ int N, /* number of sites */ BOOLEAN invcomp, /* inv. compl. strand, too */ BOOLEAN pal, /* motif is DNA palindrome */ DATASET *dataset /* the dataset */){ double log_pv; /* log of p-value */ double log_sig = 0; /* return value */ log_pv = log_qfast(w, -score); /* p-value of product of p-values */ if (N) { /* use E-value of alignment */ log_sig = log_pv + get_log_nalign(mtype, w, N, invcomp && !pal, dataset); } else { /* use p-value of alignment */ log_sig = log_pv; } return(log_sig);} /* get_log_sig *//**********************************************************************//* calc_entropy Calculate the entropy of each column and relative entropy per column of a motif. Stores results in model. model->rentropy model->rel model->logev*//**********************************************************************/extern void calc_entropy ( MODEL *model, /* the model */ DATASET *dataset /* the dataset */){ int i, j; double *rentropy = model->rentropy; /* IC of each column */ double ent = 0; /* entropy per column */ double rel = 0; /* relative entropy per col. */ int alength = dataset->alength; /* length of alphabet */ double *back = dataset->back; /* background model freqs */ THETA obs = model->obs; /* observed frequencies */ int w = model->w; /* width of motif */ int N = model->nsites_dis; /* number of sites */ double log_pop; /* log product of col p-value */ /* calculate the relative entropy of each column in motif */ model->llr = log_pop = 0; for (i=0; i<w; i++) { /* position */ double llr; /* log likelihood ratio of column */ rentropy[i] = 0.0; for (j=0; j<alength; j++) { /* alphabet letter */ double f = obs(i, j); /* motif freq */ double p = back[j]; /* background freq */ ent += f ? f * LOG2(f) : 0; /* entropy */ rel += p ? f * LOG2(p) : 0; /* relative entropy */ rentropy[i] += (f && p) ? f * LOG(f/p) : 0; } /* alphabet letter */ llr = N * rentropy[i]; /* log likelihood ratio */ RND(llr, RNDDIG, llr); /* round to RNDDIG places */ model->llr += llr; /* llr for model */ log_pop += get_llr_pv(llr, N, 1, LLR_RANGE, 1.0, alength, back); rentropy[i] /= LOG(2); } /* position in motif */ /* compute the log E-value of the motif */ model->logev = get_log_sig(-log_pop, model->mtype, w, N, N, model->invcomp, model->pal, dataset); rel = (ent - rel)/w; /* compute rel. entropy/col */ /* store results in model */ model->rel = rel;} /* calc_entropy *//**********************************************************************//* log_comb Compute logarithm of m choose n.*//**********************************************************************/ extern double log_comb( int m, int n) { int i; double x = 0; int big, little; if (m-n > n) { big = m-n; little = n; } else { big = n; little = m-n; } for (i=m; i>big; i--) x += log((double) i); for (i=2; i<=little; i++) x -= log((double) i); return x;}#ifdef OBSOLETE/**********************************************************************//* log_non_overlap_comb Compute the log of the (approximate) number of combinations of n non-overlapping segments of width w chosen from a sequence of length m.*//**********************************************************************/static double log_non_overlap_comb( int m, /* length of sequence */ int n, /* number of segments */ int w /* width of segment */) { int i; double x = 0; if (m <= w*n) return BIG; /* too many segments */ for (i=1; i<=n; i++) { x += log(m-(w*i)+1.0) - log(i); /* each segment w covered */ } return x;} /* log_non_overlap_comb */#endif/**********************************************************************//* get_log_nalign Get an upper bound on the number of independent alignments of segments of length w.*//**********************************************************************/extern double get_log_nalign( MTYPE mtype, /* type of model */ int w, /* width of motif */ int N, /* number of occurrences */ BOOLEAN invcomp, /* inv. compl. seq allowed */ DATASET *dataset /* the dataset */){ int i, t; int nseqs = dataset->n_samples; /* number of sequences */ double log_nalign = 0; /* log number alignments */ int icfactor = invcomp ? 2 : 1; /* double the possible sites */ /* sort the sequence lengths in decreasing order in array len[] first time thru */ if (len == NULL) { /* first time through */ Resize(len, nseqs, int); for (i=0; i< nseqs; i++) len[i] = dataset->samples[i]->length; qsort((char *) len, nseqs, sizeof(int), int_compare); } /* get upper bound on number of alignments */ switch (mtype) { case Oops: case Zoops: if (w > len[N-1]) { /* impossible w */ log_nalign = BIG; } else { for (i=0; i<N; i++) log_nalign += log(icfactor*(len[i]-w+1.0)); if (N < nseqs) log_nalign += log_comb(nseqs, N); } break; case Tcm: for (i=t=0; i<nseqs; i++) t += len[i] - w + 1; /* # starts */ if (N > t) { /* impossible w & N */ log_nalign = BIG; } else { /* remove 1 site per site */ for (i=0; i<N; i++) log_nalign += log((t-i)*icfactor/(i+1)); } break; } /* mtype */ return log_nalign;} /* double get_log_nalign *//**********************************************************************//* log_qfast Calculate the log p-value of the log of the product of uniform [0,1] random variables.*//**********************************************************************/extern double log_qfast( int n, /* number of random variables in product */ double logk /* product of random variables */){ int i; double term, phi; if (n == 0) return 0; /* worst possible log p-value */ phi = term = 1; for (i=1; i<n; i++) { term *= -logk/i; phi += term; } return(logk + log(phi));} /* qfast *//**********************************************************************//* int_compare Compare two integers. Return <0 0 >0 if the second int is <, =, > the first int. For sorting with qsort in decreasing order.*//**********************************************************************/extern int int_compare( const void *v1, const void *v2){ const int * s1 = (const int *) v1; const int * s2 = (const int *) v2; return(*s2 - *s1);} /* int_compare */#ifdef obsolete/**********************************************************************//* get_n_unique_multisets Get the (log) number of unique multisets of size n containing items of (up to) m different types.*//**********************************************************************/static double get_n_unique_multisets ( int n, /* size of multisets */ int m /* number of item types */){ int i, j, k; double **f; /* dynamic programming array */ create_2array(f, double, n+1, m+1) /* initialize */ for (i=0; i<=n; i++) { for (j=0; j<=m; j++) { if (i==0 || j==1) { f[i][j] = 1; } else if (i==1) { f[i][j] = j; } else { f[i][j] = 0; } } } /* fill in array */ for (j=2; j<=m; j++) for (i=2; i<=n; i++) for (k=0; k<=i; k++) f[i][j] += f[k][j-1]; fprintf(stderr, "unique multisets (%d %d) %g\n", n, m, f[n][m]); free_2array(f, n); return f[n][m];} /* get_n_unique_multisets */#endif
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?