首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
blast_stat.c

来自「ncbi源码」· C语言代码 · 共 2,179 行 · 第 1/5 页
2,179 行
    for (iterCounter = 0;         ((iterCounter < iterlimit) && (innerSum > sumlimit));         outerSum += innerSum /= ++iterCounter) {        first = last = range;        lowAlignmentScore  += low;        highAlignmentScore += high;        /*dynamic program to compute P(i,j)*/        for (ptrP = alignmentScoreProbabilities +                 (highAlignmentScore-lowAlignmentScore);             ptrP >= alignmentScoreProbabilities;             *ptrP-- =innerSum) {            ptr1  = ptrP - first;            ptr1e = ptrP - last;            ptr2  = probArrayStartLow + first;            for (innerSum = 0.; ptr1 >= ptr1e; )                innerSum += *ptr1--  *  *ptr2++;            if (first)                --first;            if (ptrP - alignmentScoreProbabilities <= range)                --last;        }        /* Horner's rule */        innerSum = *++ptrP;        for( i = lowAlignmentScore + 1; i < 0; i++ ) {            innerSum = *++ptrP + innerSum * expMinusLambda;        }        innerSum *= expMinusLambda;        for (; i <= highAlignmentScore; ++i)            innerSum += *++ptrP;        oldsum2 = oldsum;        oldsum  = innerSum;    }#ifdef ADD_GEOMETRIC_TERMS_TO_K    /*old code assumed that the later terms in sum were      asymptotically comparable to those of a geometric      progression, and tried to speed up convergence by      guessing the estimated ratio between sucessive terms      and using the explicit terms of a geometric progression      to speed up convergence. However, the assumption does not      always hold, and convergenece of the above code is fast      enough in practice*/    /* Terms of geometric progression added for correction */    {        double     ratio;  /* fraction used to generate the                                   geometric progression */        ratio = oldsum / oldsum2;        if (ratio >= (1.0 - sumlimit*0.001)) {            K = -1.;            if (alignmentScoreProbabilities != NULL)                sfree(alignmentScoreProbabilities);            return K;        }        sumlimit *= 0.01;        while (innerSum > sumlimit) {            oldsum   *= ratio;            outerSum += innerSum = oldsum / ++iterCounter;        }    }#endif    if (expMinusLambda <  SMALL_LAMBDA_THRESHOLD ) {        K = -exp((double)-2.0*outerSum) /            (firstTermClosedForm*(expMinusLambda - 1.0));    } else {        K = -exp((double)-2.0*outerSum) /            (firstTermClosedForm*BLAST_Expm1(-(double)lambda));    }    if (alignmentScoreProbabilities != NULL)        sfree(alignmentScoreProbabilities);    return K;}/** * Find positive solution to sum_{i=low}^{high} exp(i lambda) = 1. *  * @param probs probabilities of a score occurring  * @param d the gcd of the possible scores. This equals 1 if the scores * are not a lattice * @param low the lowest possible score * @param high the highest possible score * @param lambda0 an initial value for lambda * @param tolx the tolerance to which lambda must be computed * @param itmax the maximum number of times the function may be * evaluated * @param maxNewton the maximum permissible number of Newton * iteration. After that the computation will proceed by bisection. * @param itn a pointer to an integer that will receive the actually * number of iterations performed. * * Let phi(lambda) =  sum_{i=low}^{high} exp(i lambda) - 1. Then phi(lambda) * may be written * *     phi(lamdba) = exp(u lambda) p( exp(-lambda) ) * * where p(x) is a polynomial that has exactly two zeros, one at x = 1 * and one at y = exp(-lamdba). It is simpler, more numerically * efficient and stable to apply Newton's method to p(x) than to * phi(lambda). * * We define a safeguarded Newton iteration as follows. Let the * initial interval of uncertainty be [0,1]. If p'(x) >= 0, we bisect * the interval. Otherwise we try a Newton step. If the Newton iterate * lies in the current interval of uncertainty and it reduces the * value of | p(x) | by at least 10%, we accept the new * point. Otherwise, we bisect the current interval of uncertainty. * It is clear that this method converges to a zero of p(x).  Since * p'(x) > 0 in an interval containing x = 1, the method cannot * converge to x = 1 and therefore converges to the only other zero, * y. */static double NlmKarlinLambdaNR( double* probs, Int4 d, Int4 low, Int4 high, double lambda0, double tolx,									 Int4 itmax, Int4 maxNewton, Int4 * itn ) {  Int4 k;  double x0, x, a = 0, b = 1;  double f = 4;  /* Larger than any possible value of the poly in [0,1] */  Int4 isNewton = 0; /* we haven't yet taken a Newton step. */  assert( d > 0 );	x0 = exp( -lambda0 );  x = ( 0 < x0 && x0 < 1 ) ? x0 : .5;    for( k = 0; k < itmax; k++ ) { /* all iteration indices k */    Int4 i;    double g, fold = f;    Int4 wasNewton = isNewton; /* If true, then the previous step was a */                              /* Newton step */    isNewton  = 0;            /* Assume that this step is not */        /* Horner's rule for evaluating a polynomial and its derivative */    g = 0;    f = probs[low];    for( i = low + d; i < 0; i += d ) {      g = x * g + f;      f = f * x + probs[i];    }    g = x * g + f;    f = f * x + probs[0] - 1;    for( i = d; i <= high; i += d ) {      g = x * g + f;      f = f * x + probs[i];    }    /* End Horner's rule */    if( f > 0 ) {      a = x; /* move the left endpoint */    } else if( f < 0 ) {       b = x; /* move the right endpoint */    } else { /* f == 0 */      break; /* x is an exact solution */    }    if( b - a < 2 * a * ( 1 - b ) * tolx ) {      /* The midpoint of the interval converged */      x = (a + b) / 2; break;    }    if( k >= maxNewton ||        /* If convergence of Newton's method appears to be failing; or */				( wasNewton && fabs( f ) > .9 * fabs(fold) ) ||          /* if the previous iteration was a Newton step but didn't decrease          * f sufficiently; or */        g >= 0         /* if a Newton step will move us away from the desired solution */        ) { /* then */      /* bisect */      x = (a + b)/2;    } else {      /* try a Newton step */      double p = - f/g;      double y = x + p;      if( y <= a || y >= b ) { /* The proposed iterate is not in (a,b) */        x = (a + b)/2;      } else { /* The proposed iterate is in (a,b). Accept it. */        isNewton = 1;        x = y;        if( fabs( p ) < tolx * x * (1-x) ) break; /* Converged */      } /* else the proposed iterate is in (a,b) */    } /* else try a Newton step. */   } /* end for all iteration indices k */	*itn = k;   return -log(x)/d;}doubleBlast_KarlinLambdaNR(Blast_ScoreFreq* sfp, double initialLambdaGuess){	Int4	low;			/* Lowest score (must be negative)  */	Int4	high;			/* Highest score (must be positive) */	Int4		itn;	Int4	i, d;	double* 	sprob;	double	returnValue;	low = sfp->obs_min;	high = sfp->obs_max;	if (sfp->score_avg >= 0.) {	/* Expected score must be negative */		return -1.0;	}	if (BlastScoreChk(low, high) != 0) return -1.;		sprob = sfp->sprob;	/* Find greatest common divisor of all scores */	for (i = 1, d = -low; i <= high-low && d > 1; ++i) {		if (sprob[i+low] != 0) {			d = BLAST_Gcd(d, i);		}	}	returnValue =		NlmKarlinLambdaNR( sprob, d, low, high,                           initialLambdaGuess,                           BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT,						   20, 20 + BLAST_KARLIN_LAMBDA_ITER_DEFAULT, &itn );	return returnValue;}/*	BlastKarlinLtoH	Calculate H, the relative entropy of the p's and q's*/static double BlastKarlinLtoH(Blast_ScoreFreq* sfp, double	lambda){	Int4	score;	double	H, etonlam, sum, scale;	double *probs = sfp->sprob;	Int4 low   = sfp->obs_min,  high  = sfp->obs_max;	if (lambda < 0.) {		return -1.;	}	if (BlastScoreChk(low, high) != 0) return -1.;	etonlam = exp( - lambda );  sum = low * probs[low];  for( score = low + 1; score <= high; score++ ) {    sum = score * probs[score] + etonlam * sum;  }  scale = BLAST_Powi( etonlam, high );  if( scale > 0 ) {    H = lambda * sum/scale;  } else { /* Underflow of exp( -lambda * high ) */    H = lambda * exp( lambda * high + log(sum) );  }	return H;}/**************** Statistical Significance Parameter Subroutine ****************    Version 1.0     February 2, 1990    Version 1.2     July 6,     1990    Program by:     Stephen Altschul    Address:        National Center for Biotechnology Information                    National Library of Medicine                    National Institutes of Health                    Bethesda, MD  20894    Internet:       altschul@ncbi.nlm.nih.govSee:  Karlin, S. & Altschul, S.F. "Methods for Assessing the Statistical    Significance of Molecular Sequence Features by Using General Scoring    Schemes,"  Proc. Natl. Acad. Sci. USA 87 (1990), 2264-2268.    Computes the parameters lambda and K for use in calculating the    statistical significance of high-scoring segments or subalignments.    The scoring scheme must be integer valued.  A positive score must be    possible, but the expected (mean) score must be negative.    A program that calls this routine must provide the value of the lowest    possible score, the value of the greatest possible score, and a pointer    to an array of probabilities for the occurrence of all scores between    these two extreme scores.  For example, if score -2 occurs with    probability 0.7, score 0 occurs with probability 0.1, and score 3    occurs with probability 0.2, then the subroutine must be called with    low = -2, high = 3, and pr pointing to the array of values    { 0.7, 0.0, 0.1, 0.0, 0.0, 0.2 }.  The calling program must also provide    pointers to lambda and K; the subroutine will then calculate the values    of these two parameters.  In this example, lambda=0.330 and K=0.154.    The parameters lambda and K can be used as follows.  Suppose we are    given a length N random sequence of independent letters.  Associated    with each letter is a score, and the probabilities of the letters    determine the probability for each score.  Let S be the aggregate score    of the highest scoring contiguous segment of this sequence.  Then if N    is sufficiently large (greater than 100), the following bound on the    probability that S is greater than or equal to x applies:            P( S >= x )   <=   1 - exp [ - KN exp ( - lambda * x ) ].    In other words, the p-value for this segment can be written as    1-exp[-KN*exp(-lambda*S)].    This formula can be applied to pairwise sequence comparison by assigning    scores to pairs of letters (e.g. amino acids), and by replacing N in the    formula with N*M, where N and M are the lengths of the two sequences    being compared.    In addition, letting y = KN*exp(-lambda*S), the p-value for finding m    distinct segments all with score >= S is given by:                           2             m-1           -y            1 - [ 1 + y + y /2! + ... + y   /(m-1)! ] e    Notice that for m=1 this formula reduces to 1-exp(-y), which is the same    as the previous formula.*******************************************************************************/Int2Blast_KarlinBlkCalc(Blast_KarlinBlk* kbp, Blast_ScoreFreq* sfp){		if (kbp == NULL || sfp == NULL)		return 1;	/* Calculate the parameter Lambda */	kbp->Lambda = Blast_KarlinLambdaNR(sfp, BLAST_KARLIN_LAMBDA0_DEFAULT);	if (kbp->Lambda < 0.)		goto ErrExit;	/* Calculate H */	kbp->H = BlastKarlinLtoH(sfp, kbp->Lambda);	if (kbp->H < 0.)		goto ErrExit;	/* Calculate K and log(K) */	kbp->K = BlastKarlinLHtoK(sfp, kbp->Lambda, kbp->H);	if (kbp->K < 0.)		goto ErrExit;	kbp->logK = log(kbp->K);	/* Normal return */	return 0;ErrExit:	kbp->Lambda = kbp->H = kbp->K = -1.;#ifdef BLASTKAR_HUGE_VAL	kbp->logK = BLASTKAR_HUGE_VAL;#else	kbp->logK = 1.e30;#endif	return 1;}/*  Calculate the Karlin parameters.  This function should be called once  for each context, or frame translated.    The rfp and stdrfp are calculated for each context, this should be  fixed. */Int2BLAST_ScoreBlkFill(BlastScoreBlk* sbp, char* query, Int4 query_length, Int4 context_number){	Blast_ResFreq* rfp,* stdrfp;	Int2 retval=0;	rfp = Blast_ResFreqNew(sbp);	stdrfp = Blast_ResFreqNew(sbp);	Blast_ResFreqStdComp(sbp, stdrfp);	Blast_ResFreqString(sbp, rfp, query, query_length);	sbp->sfp[context_number] = Blast_ScoreFreqNew(sbp->loscore, sbp->hiscore);	BlastScoreFreqCalc(sbp, sbp->sfp[context_number], rfp, stdrfp);	sbp->kbp_std[context_number] = Blast_KarlinBlkCreate();	retval = Blast_KarlinBlkCalc(sbp->kbp_std[context_number], sbp->sfp[context_number]);	if (retval)	{		rfp = Blast_ResFreqDestruct(rfp);		stdrfp = Blast_ResFreqDestruct(stdrfp);		return retval;	}	sbp->kbp_psi[context_number] = Blast_KarlinBlkCreate();	retval = Blast_KarlinBlkCalc(sbp->kbp_psi[context_number], sbp->sfp[context_number]);	rfp = Blast_ResFreqDestruct(rfp);	stdrfp = Blast_ResFreqDestruct(stdrfp);	return retval;}/*	Calculates the standard Karlin parameters.  This is used	if the query is translated and the calculated (real) Karlin	parameters are bad, as they're calculated for non-coding regions.*/Blast_KarlinBlk*Blast_KarlinBlkIdealCalc(BlastScoreBlk* sbp){	Blast_KarlinBlk* kbp_ideal;	Blast_ResFreq* stdrfp;	Blast_ScoreFreq* sfp;	stdrfp = Blast_ResFreqNew(sbp);	Blast_ResFreqStdComp(sbp, stdrfp);	sfp = Blast_ScoreFreqNew(sbp->loscore, sbp->hiscore);	BlastScoreFreqCalc(sbp, sfp, stdrfp, stdrfp);	kbp_ideal = Blast_KarlinBlkCreate();	Blast_KarlinBlkCalc(kbp_ideal, sfp);	stdrfp = Blast_ResFreqDestruct(stdrfp);	sfp = Blast_ScoreFreqDestruct(sfp);	return kbp_ideal;}Int2Blast_KarlinBlkStandardCalc(BlastScoreBlk* sbp, Int4 context_start, Int4 context_end){	Blast_KarlinBlk* kbp_ideal,* kbp;	Int4 in
blast_stat.c - 源码说明

本页面展示了「ncbi源码」中的 blast_stat.c 源码文件，采用 C语言编程语言编写，共 2,179 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?