blast_psi_priv.c
来自「ncbi源码」· C语言 代码 · 共 1,546 行 · 第 1/4 页
C
1,546 行
/* FIXME: change so that only lambda is calculated inside the loop that scales the matrix and kappa is calculated before returning from this function. Scaling factor should be optional argument to accomodate kappa.c's needs?*/intPSIScaleMatrix(const Uint1* query, /* [in] */ Uint4 query_length, /* [in] */ const double* std_probs, /* [in] */ double* scaling_factor, /* [in] */ PsiMatrix* score_matrix, /* [in|out] */ BlastScoreBlk* sbp) /* [in|out] */{ Boolean first_time = TRUE; Uint4 index = 0; /* loop index */ int** scaled_pssm = NULL; int** pssm = NULL; double factor; double factor_low; double factor_high; double new_lambda; /* Karlin-Altschul parameter */ const double kPositPercent = 0.05; const Uint4 kPositNumIterations = 10; Boolean too_high = TRUE; double ideal_lambda; if ( !score_matrix || !sbp || !query || !std_probs ) return PSIERR_BADPARAM; ASSERT(sbp->kbp_psi[0]); scaled_pssm = score_matrix->scaled_pssm; pssm = score_matrix->pssm; ideal_lambda = sbp->kbp_ideal->Lambda; /* FIXME: need to take scaling_factor into account */ factor = 1.0; while (1) { Uint4 i = 0; Uint4 j = 0; for (i = 0; i < score_matrix->ncols; i++) { for (j = 0; j < (Uint4) sbp->alphabet_size; j++) { if (scaled_pssm[i][j] != BLAST_SCORE_MIN) { pssm[i][j] = BLAST_Nint(factor*scaled_pssm[i][j]/kPsiScaleFactor); } else { pssm[i][j] = BLAST_SCORE_MIN; } } } if (scaling_factor) { double init_lambda_guess = sbp->kbp_psi[0]->Lambda / *scaling_factor; _PSIUpdateLambdaK((const int**)pssm, query, query_length, std_probs, &init_lambda_guess, sbp); } else { _PSIUpdateLambdaK((const int**)pssm, query, query_length, std_probs, NULL, sbp); } new_lambda = sbp->kbp_psi[0]->Lambda; if (new_lambda > ideal_lambda) { if (first_time) { factor_high = 1.0 + kPositPercent; factor = factor_high; too_high = TRUE; first_time = FALSE; } else { if (too_high == FALSE) { break; } factor_high += (factor_high - 1.0); factor = factor_high; } } else if (new_lambda > 0) { if (first_time) { factor_high = 1.0; factor_low = 1.0 - kPositPercent; factor = factor_low; too_high = FALSE; first_time = FALSE; } else { if (too_high == TRUE) { break; } factor_low += (factor_low - 1.0); factor = factor_low; } } else { return PSIERR_POSITIVEAVGSCORE; } } /* Binary search for kPositNumIterations times */ for (index = 0; index < kPositNumIterations; index++) { Uint4 i = 0; Uint4 j = 0; factor = (factor_high + factor_low)/2; for (i = 0; i < score_matrix->ncols; i++) { for (j = 0; j < (Uint4) sbp->alphabet_size; j++) { if (scaled_pssm[i][j] != BLAST_SCORE_MIN) { pssm[i][j] = BLAST_Nint(factor*scaled_pssm[i][j]/kPsiScaleFactor); } else { pssm[i][j] = BLAST_SCORE_MIN; } } } if (scaling_factor) { double init_lambda_guess = sbp->kbp_psi[0]->Lambda / *scaling_factor; _PSIUpdateLambdaK((const int**)pssm, query, query_length, std_probs, &init_lambda_guess, sbp); } else { _PSIUpdateLambdaK((const int**)pssm, query, query_length, std_probs, NULL, sbp); } new_lambda = sbp->kbp_psi[0]->Lambda; if (new_lambda > ideal_lambda) { factor_low = factor; } else { factor_high = factor; } } /* FIXME: Why is this needed? */ for (index = 0; index < (Uint4) sbp->alphabet_size; index++) { pssm[score_matrix->ncols-1][index] = BLAST_SCORE_MIN; } return PSI_SUCCESS;}Uint4_PSISequenceLengthWithoutX(const Uint1* seq, Uint4 length){ const Uint1 X = AMINOACID_TO_NCBISTDAA['X']; Uint4 retval = 0; /* the return value */ Uint4 i = 0; /* loop index */ ASSERT(seq); for(i = 0; i < length; i++) { if (seq[i] != X) { retval++; } } return retval;}Blast_ScoreFreq*_PSIComputeScoreProbabilities(const int** pssm, /* [in] */ const Uint1* query, /* [in] */ Uint4 query_length, /* [in] */ const double* std_probs, /* [in] */ const BlastScoreBlk* sbp) /* [in] */{ const Uint1 X = AMINOACID_TO_NCBISTDAA['X']; Uint1 aa_alphabet[BLASTAA_SIZE]; /* ncbistdaa alphabet */ Uint4 effective_length = 0; /* length of query w/o Xs */ Uint4 p = 0; /* index on positions */ Uint4 c = 0; /* index on characters */ int s = 0; /* index on scores */ int min_score = 0; /* minimum score in pssm */ int max_score = 0; /* maximum score in pssm */ short rv = 0; /* temporary return value */ Blast_ScoreFreq* score_freqs = NULL; /* score frequencies */ ASSERT(pssm); ASSERT(query); ASSERT(std_probs); ASSERT(sbp); ASSERT(sbp->alphabet_code == BLASTAA_SEQ_CODE); rv = Blast_GetStdAlphabet(sbp->alphabet_code, aa_alphabet, BLASTAA_SIZE); if (rv <= 0) { return NULL; } ASSERT(rv == sbp->alphabet_size); effective_length = _PSISequenceLengthWithoutX(query, query_length); /* Get the minimum and maximum scores */ for (p = 0; p < query_length; p++) { for (c = 0; c < (Uint4) sbp->alphabet_size; c++) { const int kScore = pssm[p][aa_alphabet[c]]; if (kScore <= BLAST_SCORE_MIN || kScore >= BLAST_SCORE_MAX) { continue; } max_score = MAX(kScore, max_score); min_score = MIN(kScore, min_score); } } if ( !(score_freqs = Blast_ScoreFreqNew(min_score, max_score))) return NULL; score_freqs->obs_min = min_score; score_freqs->obs_max = max_score; for (p = 0; p < query_length; p++) { if (query[p] == X) { continue; } for (c = 0; c < (Uint4) sbp->alphabet_size; c++) { const Uint4 kScore = pssm[p][aa_alphabet[c]]; if (kScore <= BLAST_SCORE_MIN || kScore >= BLAST_SCORE_MAX) { continue; } /* Increment the weight for the score in position p, residue c */ score_freqs->sprob[kScore] += (std_probs[aa_alphabet[c]]/effective_length); } } for (s = min_score; s < max_score; s++) { score_freqs->score_avg += (s * score_freqs->sprob[s]); } return score_freqs;}void_PSIUpdateLambdaK(const int** pssm, /* [in] */ const Uint1* query, /* [in] */ Uint4 query_length, /* [in] */ const double* std_probs, /* [in] */ double* initial_lambda_guess, /* [in] */ BlastScoreBlk* sbp) /* [in|out] */{ Blast_ScoreFreq* score_freqs = _PSIComputeScoreProbabilities(pssm, query, query_length, std_probs, sbp); if (initial_lambda_guess) { sbp->kbp_psi[0]->Lambda = Blast_KarlinLambdaNR(score_freqs, *initial_lambda_guess); } else { /* Calculate lambda and K */ Blast_KarlinBlkCalc(sbp->kbp_psi[0], score_freqs); /* Shouldn't this be in a function? */ sbp->kbp_gap_psi[0]->K = sbp->kbp_psi[0]->K * sbp->kbp_gap_std[0]->K / sbp->kbp_ideal->K; sbp->kbp_gap_psi[0]->logK = log(sbp->kbp_gap_psi[0]->K); } score_freqs = Blast_ScoreFreqDestruct(score_freqs);}/****************************************************************************//* Function definitions for auxiliary functions for the stages above */int_PSIPurgeAlignedRegion(PsiAlignmentData* alignment, unsigned int seq_index, unsigned int start, unsigned int stop){ PsiDesc* sequence_position = NULL; unsigned int i = 0; if (!alignment) return PSIERR_BADPARAM; /* Cannot remove the query sequence from multiple alignment data or bad index */ if (seq_index == kQueryIndex || seq_index > alignment->dimensions->num_seqs + 1 || stop > alignment->dimensions->query_sz) return PSIERR_BADPARAM; sequence_position = alignment->desc_matrix[seq_index]; fprintf(stderr, "NEW purge %d (%d-%d)\n", seq_index, start, stop); for (i = start; i < stop; i++) { sequence_position[i].letter = (unsigned char) -1; sequence_position[i].used = FALSE; sequence_position[i].e_value = kDefaultEvalueForPosition; sequence_position[i].extents.left = (unsigned int) -1; sequence_position[i].extents.right = alignment->dimensions->query_sz; } _PSIDiscardIfUnused(alignment, seq_index); return PSI_SUCCESS;}/* Check if we still need this sequence */void_PSIDiscardIfUnused(PsiAlignmentData* alignment, unsigned int seq_index){ Boolean contains_aligned_regions = FALSE; unsigned int i = 0; for (i = 0; i < alignment->dimensions->query_sz; i++) { if (alignment->desc_matrix[seq_index][i].used) { contains_aligned_regions = TRUE; break; } } if ( !contains_aligned_regions ) { alignment->use_sequences[seq_index] = FALSE; fprintf(stderr, "NEW Removing sequence %d\n", seq_index); }}/****************************************************************************/double*_PSIGetStandardProbabilities(const BlastScoreBlk* sbp){ Blast_ResFreq* standard_probabilities = NULL; Uint4 i = 0; double* retval = NULL; if ( !(retval = (double*) malloc(sbp->alphabet_size * sizeof(double)))) return NULL; standard_probabilities = Blast_ResFreqNew(sbp); Blast_ResFreqStdComp(sbp, standard_probabilities); for (i = 0; i < (Uint4) sbp->alphabet_size; i++) { retval[i] = standard_probabilities->prob[i]; } Blast_ResFreqDestruct(standard_probabilities); return retval;}PsiDiagnostics*_PSISaveDiagnostics(const PsiAlignmentData* alignment, const PsiAlignedBlock* aligned_block, const PsiSequenceWeights* seq_weights){ /* _PSICalculateInformationContent(seq_weights); */ abort(); return NULL;}/* * =========================================================================== * $Log: blast_psi_priv.c,v $ * Revision 1000.1 2004/06/01 18:07:34 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * * Revision 1.6 2004/05/28 17:35:03 camacho * Fix msvc6 warnings * * Revision 1.5 2004/05/28 16:00:09 camacho * + first port of PSSM generation engine * * Revision 1.4 2004/05/19 14:52:02 camacho * 1. Added doxygen tags to enable doxygen processing of algo/blast/core * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i * location * 3. Added use of @todo doxygen keyword * * Revision 1.3 2004/05/06 14:01:40 camacho * + _PSICopyMatrix * * Revision 1.2 2004/04/07 22:08:37 kans * needed to include blast_def.h for sfree prototype * * Revision 1.1 2004/04/07 19:11:17 camacho * Initial revision * * =========================================================================== */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?