⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_kappa.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 5 页
字号:
   that in the structure that is returned   for indexing convenience the field storing scoreArray points to the   entry for score 0, so that referring to the -k index corresponds to   score -k */static Blast_ScoreFreq* notposfillSfp(Int4 **matrix, double *subjectProbArray,  double *queryProbArray, double *scoreArray,  Blast_ScoreFreq* return_sfp, Int4 range){  Int4 minScore, maxScore; /*observed minimum and maximum scores*/  Int4 i,j,k; /* indices */  minScore = maxScore = 0;  for(i = 0; i < BLASTAA_SIZE; i++) {    for(j = 0 ; j < PRO_TRUE_ALPHABET_SIZE; j++) {      k = trueCharPositions[j];      if ((matrix[i][k] != BLAST_SCORE_MIN) && (matrix[i][k] < minScore))	minScore = matrix[i][k];      if (matrix[i][k] > maxScore)        maxScore = matrix[i][k];    }  }  return_sfp->obs_min = minScore;  return_sfp->obs_max = maxScore;  for (i = 0; i < range; i++)    scoreArray[i] = 0.0;  return_sfp->sprob = &(scoreArray[-minScore]); /*center around 0*/  for(i = 0; i < BLASTAA_SIZE; i++) {    for (j = 0; j < PRO_TRUE_ALPHABET_SIZE; j++) {      k = trueCharPositions[j];      if(matrix[i][k] >= minScore) {        return_sfp->sprob[matrix[i][k]] += (queryProbArray[i] * subjectProbArray[k]);      }    }  }  return_sfp->score_avg = 0;  for(i = minScore; i <= maxScore; i++)    return_sfp->score_avg += i * return_sfp->sprob[i];  return(return_sfp);}/*matrix is a position-specific score matrix with matrixLength positions  subjectProbArray is an array containing the probability of occurrence  of each residue in the matching sequence often called the subject  scoreArray is an array of probabilities for each score that is    to be used as a field in return_sfp  return_sfp is a the structure to be filled in and returned  range is the size of scoreArray and is an upper bound on the   difference between maximum score and minimum score in the matrix  the routine posfillSfp computes the probability of each score weighted   by the probability of each query residue and fills those probabilities   into scoreArray and puts scoreArray as a field in   that in the structure that is returned   for indexing convenience the field storing scoreArray points to the   entry for score 0, so that referring to the -k index corresponds to   score -k */static Blast_ScoreFreq* posfillSfp(Int4 **matrix, Int4 matrixLength, double *subjectProbArray, double *scoreArray,  Blast_ScoreFreq* return_sfp, Int4 range){  Int4 minScore, maxScore; /*observed minimum and maximum scores*/  Int4 i,j,k; /* indices */  double onePosFrac; /*1/matrix length as a double*/  minScore = maxScore = 0;  for(i = 0; i < matrixLength; i++) {    for(j = 0 ; j < PRO_TRUE_ALPHABET_SIZE; j++) {      k = trueCharPositions[j];      if ((matrix[i][k] != BLAST_SCORE_MIN) && (matrix[i][k] < minScore))	minScore = matrix[i][k];      if (matrix[i][k] > maxScore)        maxScore = matrix[i][k];    }  }  return_sfp->obs_min = minScore;  return_sfp->obs_max = maxScore;  for (i = 0; i < range; i++)    scoreArray[i] = 0.0;  return_sfp->sprob = &(scoreArray[-minScore]); /*center around 0*/  onePosFrac = 1.0/ ((double) matrixLength);  for(i = 0; i < matrixLength; i++) {    for (j = 0; j < PRO_TRUE_ALPHABET_SIZE; j++) {      k = trueCharPositions[j];      if(matrix[i][k] >= minScore) {        return_sfp->sprob[matrix[i][k]] += (onePosFrac * subjectProbArray[k]);      }    }  }  return_sfp->score_avg = 0;  for(i = minScore; i <= maxScore; i++)    return_sfp->score_avg += i * return_sfp->sprob[i];  return(return_sfp);}/*Given a sequence of 'length' amino acid residues, compute the  probability of each residue and put that in the array resProb*/static void fillResidueProbability(Uint1* sequence, Int4 length, double * resProb){  Int4 frequency[BLASTAA_SIZE]; /*frequency of each letter*/  Int4 i; /*index*/  Int4 localLength; /*reduce for X characters*/  localLength = length;  for(i = 0; i < BLASTAA_SIZE; i++)    frequency[i] = 0;  for(i = 0; i < length; i++) {    if (XCHAR != sequence[i])      frequency[sequence[i]]++;    else      localLength--;  }  for(i = 0; i < BLASTAA_SIZE; i++) {    if (frequency[i] == 0)      resProb[i] = 0.0;    else      resProb[i] = ((double) (frequency[i])) /((double) localLength);  }}#define posEpsilon 0.0001/** Return the a matrix of the frequency ratios that underlie the * score matrix being used on this pass. The returned matrix * is position-specific, so if we are in the first pass, use * query to convert the 20x20 standard matrix into a position-specific * variant. matrixName is the name of the underlying 20x20 * score matrix used. numPositions is the length of the query; * startNumerator is the matrix of frequency ratios as stored * in posit.h. It needs to be divided by the frequency of the * second character to get the intended ratio  * @param sbp statistical information for blast [in] * @param query the query sequence [in] * @param matrixName name of the underlying matrix [in] * @param startNumerator matrix of frequency ratios as stored *      in posit.h. It needs to be divided by the frequency of the *      second character to get the intended ratio [in] * @param numPositions length of the query [in] */static double **getStartFreqRatios(BlastScoreBlk* sbp,					Uint1* query,					const char *matrixName, 					double **startNumerator,					Int4 numPositions) {   Blast_ResFreq* stdrfp; /* gets standard frequencies in prob field */   double** returnRatios; /*frequency ratios to start investigating each pair*/   double *standardProb; /*probabilities of each letter*/   Int4 i,j;  /* Loop indices. */   SFreqRatios* freqRatios=NULL; /* frequency ratio container for given matrix */   returnRatios = allocateStartFreqs(numPositions);   freqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);   ASSERT(freqRatios);   if (freqRatios == NULL)	return NULL;   for(i = 0; i < numPositions; i++) {     for(j = 0; j < BLASTAA_SIZE; j++) {	   returnRatios[i][j] = freqRatios->data[query[i]][j];     }   }   freqRatios = _PSIMatrixFrequencyRatiosFree(freqRatios);/* FIXME use blast_psi_priv.c:_PSIGetStandardProbabilities when available here. */   stdrfp = Blast_ResFreqNew(sbp);   Blast_ResFreqStdComp(sbp,stdrfp);    standardProb = calloc(1, BLASTAA_SIZE * sizeof(double));   for(i = 0; i < BLASTAA_SIZE; i++)       standardProb[i] = stdrfp->prob[i];     /*reverse multiplication done in posit.c*/   for(i = 0; i < numPositions; i++)      for(j = 0; j < BLASTAA_SIZE; j++)        if ((standardProb[query[i]] > posEpsilon) && (standardProb[j] > posEpsilon) &&     	     (j != STARCHAR) && (j != XCHAR)	     && (startNumerator[i][j] > posEpsilon))	   returnRatios[i][j] = startNumerator[i][j]/standardProb[j];   stdrfp = Blast_ResFreqDestruct(stdrfp);   sfree(standardProb);   return(returnRatios);}/** take every entry of startFreqRatios that is not corresponding to * a score of BLAST_SCORE_MIN and take its log, divide by Lambda and * multiply  by LambdaRatio then round to the nearest integer and * put the result in the corresponding entry of matrix. * startMatrix and matrix have dimensions numPositions X BLASTAA_SIZE * @param matrix preallocated matrix to be filled in [out] * @param startMatrix matrix to be scaled up [in] * @param startFreqRatios frequency ratios of starting matrix [in] * @param numPositions length of query [in] * @param Lambda A Karlin-Altschul parameter. [in] * @param LambdaRatio ratio of correct Lambda to it's original value [in]*/static void scaleMatrix(Int4 **matrix, Int4 **startMatrix, 			double **startFreqRatios, Int4 numPositions, 			double Lambda, double LambdaRatio){   Int4 p, c; /*indices over positions and characters*/   double temp; /*intermediate term in computation*/   for (p = 0; p < numPositions; p++) {     for (c = 0; c < BLASTAA_SIZE; c++) {       if (matrix[p][c] == BLAST_SCORE_MIN)	 matrix[p][c] = startMatrix[p][c];       else {         temp = log(startFreqRatios[p][c]);         temp = temp/Lambda;	 temp = temp * LambdaRatio; 	 matrix[p][c] = BLAST_Nint(temp);       }     }   }}/*SCALING_FACTOR is a multiplicative factor used to get more bits of * precision in the integer matrix scores. It cannot be arbitrarily * large because we do not want total alignment scores to exceedto * -(BLAST_SCORE_MIN) */#define SCALING_FACTOR 32/** Compute a scaled up version of the standard matrix encoded by matrix name.  * Standard matrices are in half-bit units. * @param matrix preallocated matrix [in][out] * @param matrixName name of matrix (e.g., BLOSUM62, PAM30). [in] * @param Lambda A Karlin-Altschul parameter. [in]*/static void  computeScaledStandardMatrix(Int4 **matrix, char *matrixName, double Lambda){   int i,j; /*loop indices*/   double temp; /*intermediate term in computation*/   SFreqRatios* freqRatios=NULL; /* frequency ratio container for given matrix */   freqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);   ASSERT(freqRatios);   if (freqRatios == NULL)	return;   for(i = 0; i < BLASTAA_SIZE; i++)     for(j = 0; j < BLASTAA_SIZE; j++) {         if(0.0 == freqRatios->data[i][j])	   matrix[i][j] = BLAST_SCORE_MIN;	 else {	   temp = log(freqRatios->data[i][j])/Lambda;           matrix[i][j] = BLAST_Nint(temp);     }   }   freqRatios = _PSIMatrixFrequencyRatiosFree(freqRatios);   return;}#if 0 /* FIXME *//************************************************************produce a scaled-up version of the position-specific matrix starting fromposFreqsfillPosMatrix is the matrix to be fillednonposMatrix is the underlying position-independent matrix, used tofill positions where frequencies are irrelevantsbp stores various parameters of the search*****************************************************************/void scalePosMatrix(Int4 **fillPosMatrix, Int4 **nonposMatrix, char *matrixName, double **posFreqs, Uint1 *query, Int4 queryLength, BLAST_ScoreBlk* sbp){     posSearchItems *posSearch; /*used to pass data into scaling routines*/     compactSearchItems *compactSearch; /*used to pass data into scaling routines*/     Int4 i,j ; /*loop indices*/        BLAST_ResFreq* stdrfp; /* gets standard frequencies in prob field */     Int4 a; /*index over characters*/     double **standardFreqRatios; /*frequency ratios for standard score matrix*/     Int4 multiplier; /*bit scale factor for scores*/     posSearch = (posSearchItems *) calloc (1, sizeof(posSearchItems));     compactSearch = (compactSearchItems *) calloc (1, sizeof(compactSearchItems));     posSearch->posMatrix = (Int4 **) calloc((queryLength + 1), sizeof(Int4 *));     posSearch->posPrivateMatrix = fillPosMatrix;     posSearch->posFreqs = posFreqs;     for(i = 0; i <= queryLength; i++)        posSearch->posMatrix[i] = (Int4 *) calloc(BLASTAA_SIZE, sizeof(Int4));     compactSearch->query = (Uint1*) query;     compactSearch->qlength = queryLength;     compactSearch->alphabetSize = BLASTAA_SIZE;     compactSearch->gapped_calculation = TRUE;     compactSearch->matrix = nonposMatrix;     compactSearch->lambda =  sbp->kbp_gap_std[0]->Lambda;     compactSearch->kbp_std = sbp->kbp_std;     compactSearch->kbp_psi = sbp->kbp_psi;     compactSearch->kbp_gap_psi = sbp->kbp_gap_psi;     compactSearch->kbp_gap_std = sbp->kbp_gap_std;     compactSearch->lambda_ideal = sbp->kbp_ideal->Lambda;     compactSearch->K_ideal = sbp->kbp_ideal->K;     stdrfp = BlastResFreqNew(sbp);     BlastResFreqStdComp(sbp,stdrfp);      compactSearch->standardProb = calloc(compactSearch->alphabetSize, sizeof(double));     for(a = 0; a < compactSearch->alphabetSize; a++)       compactSearch->standardProb[a] = stdrfp->prob[a];     stdrfp = BlastResFreqDestruct(stdrfp);     standardFreqRatios = (double **) calloc(BLASTAA_SIZE, sizeof(double *));     for (i = 0; i < BLASTAA_SIZE; i++)       standardFreqRatios[i] = (double *) calloc(BLASTAA_SIZE, sizeof(double));     if ((0 == strcmp(matrixName,"BLOSUM62")) ||	 (0 == strcmp(matrixName,"BLOSUM62_20"))) {       multiplier = 2;       for(i = 0; i < BLASTAA_SIZE; i++)	 for(j = 0; j < BLASTAA_SIZE; j++)	   standardFreqRatios[i][j] = BLOSUM62_FREQRATIOS[i][j];     }     if (0 == strcmp(matrixName,"BLOSUM62_20A")) {       multiplier = 2;       for(i = 0; i < BLASTAA_SIZE; i++)	 for(j = 0; j < BLASTAA_SIZE; j++)	   standardFreqRatios[i][j] = 0.9666 * BLOSUM62_FREQRATIOS[i][j];     }     if (0 == strcmp(matrixName,"BLOSUM62_20B")) {       multiplier = 2;       for(i = 0; i < BLASTAA_SIZE; i++)	 for(j = 0; j < BLASTAA_SIZE; j++)	   standardFreqRatios[i][j] = 0.9344 * BLOSUM62_FREQRATIOS[i][j];     }     if (0 == strcmp(matrixName,"BLOSUM45")) {       multiplier = 3;       for(i = 0; i < BLASTAA_SIZE; i++)	 for(j = 0; j < BLASTAA_SIZE; j++)	   standardFreqRatios[i][j] = BLOSUM45_FREQRATIOS[i][j];     }     if (0 == strcmp(matrixName,"BLOSUM80")) {       multiplier = 2;       for(i = 0; i < BLASTAA_SIZE; i++)	 for(j = 0; j < BLASTAA_SIZE; j++)	   standardFreqRatios[i][j] = BLOSUM80_FREQRATIOS[i][

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -