📄 blast_kappa.c
字号:
that in the structure that is returned for indexing convenience the field storing scoreArray points to the entry for score 0, so that referring to the -k index corresponds to score -k */static Blast_ScoreFreq* notposfillSfp(Int4 **matrix, double *subjectProbArray, double *queryProbArray, double *scoreArray, Blast_ScoreFreq* return_sfp, Int4 range){ Int4 minScore, maxScore; /*observed minimum and maximum scores*/ Int4 i,j,k; /* indices */ minScore = maxScore = 0; for(i = 0; i < BLASTAA_SIZE; i++) { for(j = 0 ; j < PRO_TRUE_ALPHABET_SIZE; j++) { k = trueCharPositions[j]; if ((matrix[i][k] != BLAST_SCORE_MIN) && (matrix[i][k] < minScore)) minScore = matrix[i][k]; if (matrix[i][k] > maxScore) maxScore = matrix[i][k]; } } return_sfp->obs_min = minScore; return_sfp->obs_max = maxScore; for (i = 0; i < range; i++) scoreArray[i] = 0.0; return_sfp->sprob = &(scoreArray[-minScore]); /*center around 0*/ for(i = 0; i < BLASTAA_SIZE; i++) { for (j = 0; j < PRO_TRUE_ALPHABET_SIZE; j++) { k = trueCharPositions[j]; if(matrix[i][k] >= minScore) { return_sfp->sprob[matrix[i][k]] += (queryProbArray[i] * subjectProbArray[k]); } } } return_sfp->score_avg = 0; for(i = minScore; i <= maxScore; i++) return_sfp->score_avg += i * return_sfp->sprob[i]; return(return_sfp);}/*matrix is a position-specific score matrix with matrixLength positions subjectProbArray is an array containing the probability of occurrence of each residue in the matching sequence often called the subject scoreArray is an array of probabilities for each score that is to be used as a field in return_sfp return_sfp is a the structure to be filled in and returned range is the size of scoreArray and is an upper bound on the difference between maximum score and minimum score in the matrix the routine posfillSfp computes the probability of each score weighted by the probability of each query residue and fills those probabilities into scoreArray and puts scoreArray as a field in that in the structure that is returned for indexing convenience the field storing scoreArray points to the entry for score 0, so that referring to the -k index corresponds to score -k */static Blast_ScoreFreq* posfillSfp(Int4 **matrix, Int4 matrixLength, double *subjectProbArray, double *scoreArray, Blast_ScoreFreq* return_sfp, Int4 range){ Int4 minScore, maxScore; /*observed minimum and maximum scores*/ Int4 i,j,k; /* indices */ double onePosFrac; /*1/matrix length as a double*/ minScore = maxScore = 0; for(i = 0; i < matrixLength; i++) { for(j = 0 ; j < PRO_TRUE_ALPHABET_SIZE; j++) { k = trueCharPositions[j]; if ((matrix[i][k] != BLAST_SCORE_MIN) && (matrix[i][k] < minScore)) minScore = matrix[i][k]; if (matrix[i][k] > maxScore) maxScore = matrix[i][k]; } } return_sfp->obs_min = minScore; return_sfp->obs_max = maxScore; for (i = 0; i < range; i++) scoreArray[i] = 0.0; return_sfp->sprob = &(scoreArray[-minScore]); /*center around 0*/ onePosFrac = 1.0/ ((double) matrixLength); for(i = 0; i < matrixLength; i++) { for (j = 0; j < PRO_TRUE_ALPHABET_SIZE; j++) { k = trueCharPositions[j]; if(matrix[i][k] >= minScore) { return_sfp->sprob[matrix[i][k]] += (onePosFrac * subjectProbArray[k]); } } } return_sfp->score_avg = 0; for(i = minScore; i <= maxScore; i++) return_sfp->score_avg += i * return_sfp->sprob[i]; return(return_sfp);}/*Given a sequence of 'length' amino acid residues, compute the probability of each residue and put that in the array resProb*/static void fillResidueProbability(Uint1* sequence, Int4 length, double * resProb){ Int4 frequency[BLASTAA_SIZE]; /*frequency of each letter*/ Int4 i; /*index*/ Int4 localLength; /*reduce for X characters*/ localLength = length; for(i = 0; i < BLASTAA_SIZE; i++) frequency[i] = 0; for(i = 0; i < length; i++) { if (XCHAR != sequence[i]) frequency[sequence[i]]++; else localLength--; } for(i = 0; i < BLASTAA_SIZE; i++) { if (frequency[i] == 0) resProb[i] = 0.0; else resProb[i] = ((double) (frequency[i])) /((double) localLength); }}#define posEpsilon 0.0001/** Return the a matrix of the frequency ratios that underlie the * score matrix being used on this pass. The returned matrix * is position-specific, so if we are in the first pass, use * query to convert the 20x20 standard matrix into a position-specific * variant. matrixName is the name of the underlying 20x20 * score matrix used. numPositions is the length of the query; * startNumerator is the matrix of frequency ratios as stored * in posit.h. It needs to be divided by the frequency of the * second character to get the intended ratio * @param sbp statistical information for blast [in] * @param query the query sequence [in] * @param matrixName name of the underlying matrix [in] * @param startNumerator matrix of frequency ratios as stored * in posit.h. It needs to be divided by the frequency of the * second character to get the intended ratio [in] * @param numPositions length of the query [in] */static double **getStartFreqRatios(BlastScoreBlk* sbp, Uint1* query, const char *matrixName, double **startNumerator, Int4 numPositions) { Blast_ResFreq* stdrfp; /* gets standard frequencies in prob field */ double** returnRatios; /*frequency ratios to start investigating each pair*/ double *standardProb; /*probabilities of each letter*/ Int4 i,j; /* Loop indices. */ SFreqRatios* freqRatios=NULL; /* frequency ratio container for given matrix */ returnRatios = allocateStartFreqs(numPositions); freqRatios = _PSIMatrixFrequencyRatiosNew(matrixName); ASSERT(freqRatios); if (freqRatios == NULL) return NULL; for(i = 0; i < numPositions; i++) { for(j = 0; j < BLASTAA_SIZE; j++) { returnRatios[i][j] = freqRatios->data[query[i]][j]; } } freqRatios = _PSIMatrixFrequencyRatiosFree(freqRatios);/* FIXME use blast_psi_priv.c:_PSIGetStandardProbabilities when available here. */ stdrfp = Blast_ResFreqNew(sbp); Blast_ResFreqStdComp(sbp,stdrfp); standardProb = calloc(1, BLASTAA_SIZE * sizeof(double)); for(i = 0; i < BLASTAA_SIZE; i++) standardProb[i] = stdrfp->prob[i]; /*reverse multiplication done in posit.c*/ for(i = 0; i < numPositions; i++) for(j = 0; j < BLASTAA_SIZE; j++) if ((standardProb[query[i]] > posEpsilon) && (standardProb[j] > posEpsilon) && (j != STARCHAR) && (j != XCHAR) && (startNumerator[i][j] > posEpsilon)) returnRatios[i][j] = startNumerator[i][j]/standardProb[j]; stdrfp = Blast_ResFreqDestruct(stdrfp); sfree(standardProb); return(returnRatios);}/** take every entry of startFreqRatios that is not corresponding to * a score of BLAST_SCORE_MIN and take its log, divide by Lambda and * multiply by LambdaRatio then round to the nearest integer and * put the result in the corresponding entry of matrix. * startMatrix and matrix have dimensions numPositions X BLASTAA_SIZE * @param matrix preallocated matrix to be filled in [out] * @param startMatrix matrix to be scaled up [in] * @param startFreqRatios frequency ratios of starting matrix [in] * @param numPositions length of query [in] * @param Lambda A Karlin-Altschul parameter. [in] * @param LambdaRatio ratio of correct Lambda to it's original value [in]*/static void scaleMatrix(Int4 **matrix, Int4 **startMatrix, double **startFreqRatios, Int4 numPositions, double Lambda, double LambdaRatio){ Int4 p, c; /*indices over positions and characters*/ double temp; /*intermediate term in computation*/ for (p = 0; p < numPositions; p++) { for (c = 0; c < BLASTAA_SIZE; c++) { if (matrix[p][c] == BLAST_SCORE_MIN) matrix[p][c] = startMatrix[p][c]; else { temp = log(startFreqRatios[p][c]); temp = temp/Lambda; temp = temp * LambdaRatio; matrix[p][c] = BLAST_Nint(temp); } } }}/*SCALING_FACTOR is a multiplicative factor used to get more bits of * precision in the integer matrix scores. It cannot be arbitrarily * large because we do not want total alignment scores to exceedto * -(BLAST_SCORE_MIN) */#define SCALING_FACTOR 32/** Compute a scaled up version of the standard matrix encoded by matrix name. * Standard matrices are in half-bit units. * @param matrix preallocated matrix [in][out] * @param matrixName name of matrix (e.g., BLOSUM62, PAM30). [in] * @param Lambda A Karlin-Altschul parameter. [in]*/static void computeScaledStandardMatrix(Int4 **matrix, char *matrixName, double Lambda){ int i,j; /*loop indices*/ double temp; /*intermediate term in computation*/ SFreqRatios* freqRatios=NULL; /* frequency ratio container for given matrix */ freqRatios = _PSIMatrixFrequencyRatiosNew(matrixName); ASSERT(freqRatios); if (freqRatios == NULL) return; for(i = 0; i < BLASTAA_SIZE; i++) for(j = 0; j < BLASTAA_SIZE; j++) { if(0.0 == freqRatios->data[i][j]) matrix[i][j] = BLAST_SCORE_MIN; else { temp = log(freqRatios->data[i][j])/Lambda; matrix[i][j] = BLAST_Nint(temp); } } freqRatios = _PSIMatrixFrequencyRatiosFree(freqRatios); return;}#if 0 /* FIXME *//************************************************************produce a scaled-up version of the position-specific matrix starting fromposFreqsfillPosMatrix is the matrix to be fillednonposMatrix is the underlying position-independent matrix, used tofill positions where frequencies are irrelevantsbp stores various parameters of the search*****************************************************************/void scalePosMatrix(Int4 **fillPosMatrix, Int4 **nonposMatrix, char *matrixName, double **posFreqs, Uint1 *query, Int4 queryLength, BLAST_ScoreBlk* sbp){ posSearchItems *posSearch; /*used to pass data into scaling routines*/ compactSearchItems *compactSearch; /*used to pass data into scaling routines*/ Int4 i,j ; /*loop indices*/ BLAST_ResFreq* stdrfp; /* gets standard frequencies in prob field */ Int4 a; /*index over characters*/ double **standardFreqRatios; /*frequency ratios for standard score matrix*/ Int4 multiplier; /*bit scale factor for scores*/ posSearch = (posSearchItems *) calloc (1, sizeof(posSearchItems)); compactSearch = (compactSearchItems *) calloc (1, sizeof(compactSearchItems)); posSearch->posMatrix = (Int4 **) calloc((queryLength + 1), sizeof(Int4 *)); posSearch->posPrivateMatrix = fillPosMatrix; posSearch->posFreqs = posFreqs; for(i = 0; i <= queryLength; i++) posSearch->posMatrix[i] = (Int4 *) calloc(BLASTAA_SIZE, sizeof(Int4)); compactSearch->query = (Uint1*) query; compactSearch->qlength = queryLength; compactSearch->alphabetSize = BLASTAA_SIZE; compactSearch->gapped_calculation = TRUE; compactSearch->matrix = nonposMatrix; compactSearch->lambda = sbp->kbp_gap_std[0]->Lambda; compactSearch->kbp_std = sbp->kbp_std; compactSearch->kbp_psi = sbp->kbp_psi; compactSearch->kbp_gap_psi = sbp->kbp_gap_psi; compactSearch->kbp_gap_std = sbp->kbp_gap_std; compactSearch->lambda_ideal = sbp->kbp_ideal->Lambda; compactSearch->K_ideal = sbp->kbp_ideal->K; stdrfp = BlastResFreqNew(sbp); BlastResFreqStdComp(sbp,stdrfp); compactSearch->standardProb = calloc(compactSearch->alphabetSize, sizeof(double)); for(a = 0; a < compactSearch->alphabetSize; a++) compactSearch->standardProb[a] = stdrfp->prob[a]; stdrfp = BlastResFreqDestruct(stdrfp); standardFreqRatios = (double **) calloc(BLASTAA_SIZE, sizeof(double *)); for (i = 0; i < BLASTAA_SIZE; i++) standardFreqRatios[i] = (double *) calloc(BLASTAA_SIZE, sizeof(double)); if ((0 == strcmp(matrixName,"BLOSUM62")) || (0 == strcmp(matrixName,"BLOSUM62_20"))) { multiplier = 2; for(i = 0; i < BLASTAA_SIZE; i++) for(j = 0; j < BLASTAA_SIZE; j++) standardFreqRatios[i][j] = BLOSUM62_FREQRATIOS[i][j]; } if (0 == strcmp(matrixName,"BLOSUM62_20A")) { multiplier = 2; for(i = 0; i < BLASTAA_SIZE; i++) for(j = 0; j < BLASTAA_SIZE; j++) standardFreqRatios[i][j] = 0.9666 * BLOSUM62_FREQRATIOS[i][j]; } if (0 == strcmp(matrixName,"BLOSUM62_20B")) { multiplier = 2; for(i = 0; i < BLASTAA_SIZE; i++) for(j = 0; j < BLASTAA_SIZE; j++) standardFreqRatios[i][j] = 0.9344 * BLOSUM62_FREQRATIOS[i][j]; } if (0 == strcmp(matrixName,"BLOSUM45")) { multiplier = 3; for(i = 0; i < BLASTAA_SIZE; i++) for(j = 0; j < BLASTAA_SIZE; j++) standardFreqRatios[i][j] = BLOSUM45_FREQRATIOS[i][j]; } if (0 == strcmp(matrixName,"BLOSUM80")) { multiplier = 2; for(i = 0; i < BLASTAA_SIZE; i++) for(j = 0; j < BLASTAA_SIZE; j++) standardFreqRatios[i][j] = BLOSUM80_FREQRATIOS[i][
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -