首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
blast_stat.c

来自「ncbi源码」· C语言代码 · 共 2,179 行 · 第 1/5 页
2,179 行
	 		residues[index] =             AMINOACID_TO_NCBISTDAA[toupper(STD_AMINO_ACID_FREQS[index].ch)];		}		else		{			residues[index] = STD_AMINO_ACID_FREQS[index].ch;		}	}	return index;}Int2Blast_ResFreqStdComp(const BlastScoreBlk* sbp, Blast_ResFreq* rfp){	Int2 retval;        Uint4 index;	Uint1* residues;	if (sbp->protein_alphabet == TRUE)	{		residues = (Uint1*) calloc(DIM(STD_AMINO_ACID_FREQS), sizeof(Uint1));		retval = Blast_GetStdAlphabet(sbp->alphabet_code, residues, DIM(STD_AMINO_ACID_FREQS));		if (retval < 1)			return retval;		for (index=0; index<DIM(STD_AMINO_ACID_FREQS); index++) 		{			rfp->prob[residues[index]] = STD_AMINO_ACID_FREQS[index].p;		}		sfree(residues);	}	else	{	/* beginning of blastna and ncbi2na are the same. */		/* Only blastna used  for nucleotides. */		for (index=0; index<DIM(nt_prob); index++) 		{			rfp->prob[index] = nt_prob[index].p;		}	}	Blast_ResFreqNormalize(sbp, rfp, 1.0);	return 0;}typedef struct Blast_ResComp {    Uint1	alphabet_code;    Int4*	comp; 	/* composition of alphabet, array starts at beginning of alphabet. */    Int4*   comp0;	/* Same array as above, starts at zero. */} Blast_ResComp;static Blast_ResComp*BlastResCompDestruct(Blast_ResComp* rcp){	if (rcp == NULL)		return NULL;	if (rcp->comp0 != NULL)		sfree(rcp->comp0);	sfree(rcp);	return NULL;}/* 	Allocated the Blast_ResComp* for a given alphabet.  Only the	alphabets ncbistdaa and ncbi4na should be used by BLAST.*/static Blast_ResComp*BlastResCompNew(BlastScoreBlk* sbp){	Blast_ResComp*	rcp;	rcp = (Blast_ResComp*) calloc(1, sizeof(Blast_ResComp));	if (rcp == NULL)		return NULL;	rcp->alphabet_code = sbp->alphabet_code;/* comp0 has zero offset, comp starts at 0, only one array is allocated.  */	rcp->comp0 = (Int4*) calloc(BLAST_MATRIX_SIZE, sizeof(Int4));	if (rcp->comp0 == NULL) 	{		rcp = BlastResCompDestruct(rcp);		return rcp;	}	rcp->comp = rcp->comp0 - sbp->alphabet_start;	return rcp;}/*	Store the composition of a (query) string.  */static Int2BlastResCompStr(BlastScoreBlk* sbp, Blast_ResComp* rcp, char* str, Int4 length){	char*	lp,* lpmax;	Int2 index;        Uint1 mask;	if (sbp == NULL || rcp == NULL || str == NULL)		return 1;	if (rcp->alphabet_code != sbp->alphabet_code)		return 1;        /* For megablast, check only the first 4 bits of the sequence values */        mask = (sbp->protein_alphabet ? 0xff : 0x0f);/* comp0 starts at zero and extends for "num", comp is the same array, but "start_at" offset. */	for (index=0; index<(sbp->alphabet_size); index++)		rcp->comp0[index] = 0;	for (lp = str, lpmax = lp+length; lp < lpmax; lp++)	{		++rcp->comp[(int)(*lp & mask)];	}	/* Don't count ambig. residues. */	for (index=0; index<sbp->ambig_occupy; index++)	{		rcp->comp[sbp->ambiguous_res[index]] = 0;	}	return 0;}static Int2Blast_ResFreqClr(const BlastScoreBlk* sbp, Blast_ResFreq* rfp){	Int2	alphabet_max, index; 	alphabet_max = sbp->alphabet_start + sbp->alphabet_size;	for (index=sbp->alphabet_start; index<alphabet_max; index++)                rfp->prob[index] = 0.0;         return 0;}/*	Calculate the residue frequencies associated with the provided ResComp*/static Int2Blast_ResFreqResComp(BlastScoreBlk* sbp, Blast_ResFreq* rfp,                      Blast_ResComp* rcp){	Int2	alphabet_max, index;	double	sum = 0.;	if (rfp == NULL || rcp == NULL)		return 1;	if (rfp->alphabet_code != rcp->alphabet_code)		return 1;	alphabet_max = sbp->alphabet_start + sbp->alphabet_size;	for (index=sbp->alphabet_start; index<alphabet_max; index++)		sum += rcp->comp[index];	if (sum == 0.) {		Blast_ResFreqClr(sbp, rfp);		return 0;	}	for (index=sbp->alphabet_start; index<alphabet_max; index++)		rfp->prob[index] = rcp->comp[index] / sum;	return 0;}static Int2Blast_ResFreqString(BlastScoreBlk* sbp, Blast_ResFreq* rfp, char* string, Int4 length){	Blast_ResComp* rcp;		rcp = BlastResCompNew(sbp);	BlastResCompStr(sbp, rcp, string, length);	Blast_ResFreqResComp(sbp, rfp, rcp);	rcp = BlastResCompDestruct(rcp);	return 0;}static Int2BlastScoreChk(Int4 lo, Int4 hi){	if (lo >= 0 || hi <= 0 ||			lo < BLAST_SCORE_1MIN || hi > BLAST_SCORE_1MAX)		return 1;	if (hi - lo > BLAST_SCORE_RANGE_MAX)		return 1;	return 0;}Blast_ScoreFreq*Blast_ScoreFreqNew(Int4 score_min, Int4 score_max){	Blast_ScoreFreq*	sfp;	Int4	range;	if (BlastScoreChk(score_min, score_max) != 0)		return NULL;	sfp = (Blast_ScoreFreq*) calloc(1, sizeof(Blast_ScoreFreq));	if (sfp == NULL)		return NULL;	range = score_max - score_min + 1;	sfp->sprob = (double*) calloc(range, sizeof(double));	if (sfp->sprob == NULL) 	{		Blast_ScoreFreqDestruct(sfp);		return NULL;	}	sfp->sprob0 = sfp->sprob;	sfp->sprob -= score_min;        /* center around 0 */	sfp->score_min = score_min;	sfp->score_max = score_max;	sfp->obs_min = sfp->obs_max = 0;	sfp->score_avg = 0.0;	return sfp;}static Int2BlastScoreFreqCalc(BlastScoreBlk* sbp, Blast_ScoreFreq* sfp, Blast_ResFreq* rfp1, Blast_ResFreq* rfp2){	Int4 **	matrix;	Int4	score, obs_min, obs_max;	double		score_sum, score_avg;	Int2 		alphabet_start, alphabet_end, index1, index2;	if (sbp == NULL || sfp == NULL)		return 1;	if (sbp->loscore < sfp->score_min || sbp->hiscore > sfp->score_max)		return 1;	for (score = sfp->score_min; score <= sfp->score_max; score++)		sfp->sprob[score] = 0.0;	matrix = sbp->matrix;	alphabet_start = sbp->alphabet_start;	alphabet_end = alphabet_start + sbp->alphabet_size;	for (index1=alphabet_start; index1<alphabet_end; index1++)	{		for (index2=alphabet_start; index2<alphabet_end; index2++)		{			score = matrix[index1][index2];			if (score >= sbp->loscore) 			{				sfp->sprob[score] += rfp1->prob[index1] * rfp2->prob[index2];			}		}	}	score_sum = 0.;	obs_min = obs_max = BLAST_SCORE_MIN;	for (score = sfp->score_min; score <= sfp->score_max; score++)	{		if (sfp->sprob[score] > 0.) 		{			score_sum += sfp->sprob[score];			obs_max = score;			if (obs_min == BLAST_SCORE_MIN)				obs_min = score;		}	}	sfp->obs_min = obs_min;	sfp->obs_max = obs_max;	score_avg = 0.0;	if (score_sum > 0.0001 || score_sum < -0.0001)	{		for (score = obs_min; score <= obs_max; score++) 		{			sfp->sprob[score] /= score_sum;			score_avg += score * sfp->sprob[score];		}	}	sfp->score_avg = score_avg;	return 0;}#define DIMOFP0	(iterlimit*range + 1)#define DIMOFP0_MAX (BLAST_KARLIN_K_ITER_MAX*BLAST_SCORE_RANGE_MAX+1)#define SMALL_LAMBDA_THRESHOLD 20 /*defines special case in K computation*/                                /*threshold is on exp(-Lambda)*//*The following procedure computes K. The input includes Lambda, H, *  and an array of probabilities for each score. *  There are distinct closed form for three cases: *  1. high score is 1 low score is -1 *  2. high score is 1 low score is not -1 *  3. low score is -1, high score is not 1 * * Otherwise, in most cases the value is computed as: * -exp(-2.0*outerSum) / ((H/lambda)*(exp(-lambda) - 1) * The last term (exp(-lambda) - 1) can be computed in two different * ways depending on whether lambda is small or not. * outerSum is a sum of the terms * innerSum/j, where j is denoted by iterCounter in the code. * The sum is truncated when the new term innersum/j i sufficiently small. * innerSum is a weighted sum of the probabilities of * of achieving a total score i in a gapless alignment, * which we denote by P(i,j). * of exactly j characters. innerSum(j) has two parts * Sum over i < 0  P(i,j)exp(-i * lambda) + * Sum over i >=0  P(i,j) * The terms P(i,j) are computed by dynamic programming. * An earlier version was flawed in that ignored the special case 1 * and tried to replace the tail of the computation of outerSum * by a geometric series, but the base of the geometric series * was not accurately estimated in some cases. */static doubleBlastKarlinLHtoK(Blast_ScoreFreq* sfp, double lambda, double H){    /*The next array stores the probabilities of getting each possible      score in an alignment of fixed length; the array is shifted      during part of the computation, so that      entry 0 is for score 0.  */    double         *alignmentScoreProbabilities = NULL;    Int4            low;    /* Lowest score (must be negative) */    Int4            high;   /* Highest score (must be positive) */    Int4            range;  /* range of scores, computed as high - low*/    double          K;      /* local copy of K  to return*/    int             i;   /*loop index*/    int             iterCounter; /*counter on iterations*/    Int4            divisor; /*candidate divisor of all scores with                               non-zero probabilities*/    /*highest and lowest possible alignment scores for current length*/    Int4            lowAlignmentScore, highAlignmentScore;    Int4            first, last; /*loop indices for dynamic program*/    register double innerSum;    double          oldsum, oldsum2;  /* values of innerSum on previous                                         iterations*/    double          outerSum;        /* holds sum over j of (innerSum                                        for iteration j/j)*/    double          score_avg; /*average score*/    /*first term to use in the closed form for the case where      high == 1 or low == -1, but not both*/    double          firstTermClosedForm;  /*usually store H/lambda*/    int             iterlimit; /*upper limit on iterations*/    double          sumlimit; /*lower limit on contributions                                to sum over scores*/    /*array of score probabilities reindexed so that low is at index 0*/    double         *probArrayStartLow;    /*pointers used in dynamic program*/    double         *ptrP, *ptr1, *ptr2, *ptr1e;    double          expMinusLambda; /*e^^(-Lambda) */    if (lambda <= 0. || H <= 0.) {        /* Theory dictates that H and lambda must be positive, so         * return -1 to indicate an error */        return -1.;    }    /*Karlin-Altschul theory works only if the expected score      is negative*/    if (sfp->score_avg >= 0.0) {        return -1.;    }    low   = sfp->obs_min;    high  = sfp->obs_max;    range = high - low;    probArrayStartLow = &sfp->sprob[low];    /* Look for the greatest common divisor ("delta" in Appendix of PNAS 87 of       Karlin&Altschul (1990) */    for (i = 1, divisor = -low; i <= range && divisor > 1; ++i) {        if (probArrayStartLow[i])            divisor = BLAST_Gcd(divisor, i);    }    high   /= divisor;    low    /= divisor;    lambda *= divisor;    range = high - low;    firstTermClosedForm = H/lambda;    expMinusLambda      = exp((double) -lambda);    if (low == -1 && high == 1) {        K = (sfp->sprob[low] - sfp->sprob[high]) *            (sfp->sprob[low] - sfp->sprob[high]) / sfp->sprob[low];        return(K);    }    if (low == -1 || high == 1) {        if (high == 1)            ;        else {            score_avg = sfp->score_avg / divisor;            firstTermClosedForm                = (score_avg * score_avg) / firstTermClosedForm;        }        return firstTermClosedForm * (1.0 - expMinusLambda);    }    sumlimit  = BLAST_KARLIN_K_SUMLIMIT_DEFAULT;    iterlimit = BLAST_KARLIN_K_ITER_MAX;    if (DIMOFP0 > DIMOFP0_MAX) {        return -1.;    }    alignmentScoreProbabilities =        (double *)calloc(DIMOFP0, sizeof(*alignmentScoreProbabilities));    if (alignmentScoreProbabilities == NULL)        return -1.;    outerSum = 0.;    lowAlignmentScore = highAlignmentScore = 0;    alignmentScoreProbabilities[0] = innerSum = oldsum = oldsum2 = 1.;
blast_stat.c - 源码说明

本页面展示了「ncbi源码」中的 blast_stat.c 源码文件，采用 C语言编程语言编写，共 2,179 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?