conservation_colorer.cpp
来自「ncbi源码」· C++ 代码 · 共 641 行 · 第 1/2 页
CPP
641 行
/* * =========================================================================== * PRODUCTION $Log: conservation_colorer.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 18:28:29 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35 * PRODUCTION * =========================================================================== *//* $Id: conservation_colorer.cpp,v 1000.3 2004/06/01 18:28:29 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Authors: Paul Thiessen** File Description:* Classes to color alignment blocks by sequence conservation** ===========================================================================*/#ifdef _MSC_VER#pragma warning(disable:4018) // disable signed/unsigned mismatch warning in MSVC#endif#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp> // must come before C-toolkit stuff#include <corelib/ncbi_limits.h>#include <util/tables/raw_scoremat.h>#include "block_multiple_alignment.hpp"#include "conservation_colorer.hpp"#include "cn3d_tools.hpp"#include "cn3d_blast.hpp"#include <blastkar.h> // for BLAST standard probability routines#include <objseq.h>#include <math.h>USING_NCBI_SCOPE;BEGIN_SCOPE(Cn3D)static inline char ScreenResidueCharacter(char original){ char ch = toupper(original); switch (ch) { case 'A': case 'R': case 'N': case 'D': case 'C': case 'Q': case 'E': case 'G': case 'H': case 'I': case 'L': case 'K': case 'M': case 'F': case 'P': case 'S': case 'T': case 'W': case 'Y': case 'V': case 'B': case 'Z': break; default: ch = 'X'; // make all but natural aa's just 'X' } return ch;}int GetBLOSUM62Score(char a, char b){ static SNCBIFullScoreMatrix Blosum62Matrix; static bool unpacked = false; if (!unpacked) { NCBISM_Unpack(&NCBISM_Blosum62, &Blosum62Matrix); unpacked = true; } return Blosum62Matrix.s[ScreenResidueCharacter(a)][ScreenResidueCharacter(b)];}static int GetPSSMScore(const BLAST_Matrix *pssm, char ch, int masterIndex){ if (!pssm || masterIndex < 0 || masterIndex >= pssm->rows) { ERRORMSG("GetPSSMScore() - invalid parameters"); return 0; } return pssm->matrix[masterIndex][LookupBLASTResidueNumberFromCharacter(ch)];}static map < char, float > StandardProbabilities;ConservationColorer::ConservationColorer(const BlockMultipleAlignment *parent) : nColumns(0), basicColorsCurrent(false), fitColorsCurrent(false), alignment(parent){ if (StandardProbabilities.size() == 0) { // initialize static stuff static const char ncbistdaa2char[26] = { 'X', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z', 'X', 'X' }; // calculate expected residue frequencies (standard probabilities) // (code borrowed from SeqAlignInform() in cddutil.c) BLAST_ScoreBlkPtr sbp = BLAST_ScoreBlkNew(Seq_code_ncbistdaa, 1); BLAST_ResFreqPtr stdrfp = NULL; int a; if (!sbp) goto on_error; sbp->read_in_matrix = TRUE; sbp->protein_alphabet = TRUE; sbp->posMatrix = NULL; sbp->number_of_contexts = 1; Nlm_Char BLOSUM62[20]; Nlm_StrCpy(BLOSUM62, "BLOSUM62"); // test C-toolkit data directory registry - which BlastScoreBlkMatFill() uses Nlm_Char dataPath[255]; if (Nlm_FindPath("ncbi", "ncbi", "data", dataPath, 255)) TRACEMSG("FindPath() returned '" << dataPath << "'"); else WARNINGMSG("FindPath() failed!"); if (BlastScoreBlkMatFill(sbp, BLOSUM62) != 0) goto on_error; stdrfp = BlastResFreqNew(sbp); if (!stdrfp) goto on_error; if (BlastResFreqStdComp(sbp, stdrfp) != 0) goto on_error; for (a=1; a<23; ++a) // from 'A' to 'Y' StandardProbabilities[ncbistdaa2char[a]] = (float) stdrfp->prob[a];// for (a=1; a<23; ++a)// TESTMSG("std prob '" << ncbistdaa2char[a] << "' = "// << setprecision(10) << StandardProbabilities[ncbistdaa2char[a]]); goto cleanup;on_error: ERRORMSG("ConservationColorer::ConservationColorer() - " "error initializing standard residue probabilities");cleanup: if (stdrfp) BlastResFreqDestruct(stdrfp); if (sbp) BLAST_ScoreBlkDestruct(sbp); }}void ConservationColorer::AddBlock(const UngappedAlignedBlock *block){ // sanity check if (!block->IsFrom(alignment)) { ERRORMSG("ConservationColorer::AddBlock : block is not from the associated alignment"); return; } blocks[block].resize(block->width); // map block column position to profile position for (int i=0; i<block->width; ++i) blocks[block][i] = nColumns + i; nColumns += block->width; basicColorsCurrent = fitColorsCurrent = false;}void ConservationColorer::CalculateBasicConservationColors(void){ if (basicColorsCurrent || blocks.size() == 0) return; TRACEMSG("calculating basic conservation colors"); int nRows = alignment->NRows(); ColumnProfile::iterator p, pe, p2; int row, profileColumn; alignmentProfile.resize(nColumns); typedef vector < int > IntVector; IntVector varieties(nColumns, 0), weightedVarieties(nColumns, 0); identities.resize(nColumns); int minVariety, maxVariety, minWeightedVariety, maxWeightedVariety; typedef vector < float > FloatVector; FloatVector informationContents(nColumns, 0.0f); float totalInformationContent = 0.0f; BlockMap::const_iterator b, be = blocks.end(); for (b=blocks.begin(); b!=be; ++b) { for (int blockColumn=0; blockColumn<b->first->width; ++blockColumn) { profileColumn = b->second[blockColumn]; ColumnProfile& profile = alignmentProfile[profileColumn]; // create profile for this column profile.clear(); for (row=0; row<nRows; ++row) { char ch = ScreenResidueCharacter(b->first->GetCharacterAt(blockColumn, row)); if ((p=profile.find(ch)) != profile.end()) ++(p->second); else profile[ch] = 1; } pe = profile.end(); // identity for this column if (profile.size() == 1 && profile.begin()->first != 'X') identities[profileColumn] = true; else identities[profileColumn] = false; // variety for this column int& variety = varieties[profileColumn]; variety = profile.size(); if (profile.find('X') != profile.end()) variety += profile['X'] - 1; // each 'X' counts as one variety if (blockColumn == 0 && b == blocks.begin()) { minVariety = maxVariety = variety; } else { if (variety < minVariety) minVariety = variety; else if (variety > maxVariety) maxVariety = variety; } // weighted variety for this column int& weightedVariety = weightedVarieties[profileColumn]; for (p=profile.begin(); p!=pe; ++p) { weightedVariety += (p->second * (p->second - 1) / 2) * GetBLOSUM62Score(p->first, p->first); p2 = p; for (++p2; p2!=pe; ++p2) weightedVariety += p->second * p2->second * GetBLOSUM62Score(p->first, p2->first); } if (blockColumn == 0 && b == blocks.begin()) { minWeightedVariety = maxWeightedVariety = weightedVariety; } else { if (weightedVariety < minWeightedVariety) minWeightedVariety = weightedVariety; else if (weightedVariety > maxWeightedVariety) maxWeightedVariety = weightedVariety; } // information content (calculated in bits -> logs of base 2) for this column float &columnInfo = informationContents[profileColumn]; for (p=profile.begin(); p!=pe; ++p) { static const float ln2 = log(2.0f), threshhold = 0.0001f; float residueScore = 0.0f, expFreq = StandardProbabilities[p->first]; if (expFreq > threshhold) { float obsFreq = 1.0f * p->second / nRows, freqRatio = obsFreq / expFreq; if (freqRatio > threshhold) { residueScore = obsFreq * ((float) log(freqRatio)) / ln2; columnInfo += residueScore; // information content } } } totalInformationContent += columnInfo; } } INFOMSG("Total information content of aligned blocks: " << totalInformationContent << " bits"); // now assign colors varietyColors.resize(nColumns); weightedVarietyColors.resize(nColumns); informationContentColors.resize(nColumns); double scale; for (profileColumn=0; profileColumn<nColumns; ++profileColumn) { // variety if (maxVariety == minVariety) scale = 1.0; else scale = 1.0 - 1.0 * (varieties[profileColumn] - minVariety) / (maxVariety - minVariety); varietyColors[profileColumn] = GlobalColors()->Get(Colors::eConservationMap, scale); // weighted variety if (maxWeightedVariety == minWeightedVariety) scale = 1.0; else scale = 1.0 * (weightedVarieties[profileColumn] - minWeightedVariety) / (maxWeightedVariety - minWeightedVariety); weightedVarietyColors[profileColumn] = GlobalColors()->Get(Colors::eConservationMap, scale); // information content, based on absolute scale static const float minInform = 0.10f, maxInform = 6.24f; scale = (informationContents[profileColumn] - minInform) / (maxInform - minInform); if (scale < 0.0) scale = 0.0; else if (scale > 1.0) scale = 1.0; scale = sqrt(scale); // apply non-linearity so that lower values are better distinguished informationContentColors[profileColumn] = GlobalColors()->Get(Colors::eConservationMap, scale); } basicColorsCurrent = true; fitColorsCurrent = false;}void ConservationColorer::CalculateFitConservationColors(void){ if (fitColorsCurrent || blocks.size() == 0) return; CalculateBasicConservationColors(); // also includes profile TRACEMSG("calculating fit conservation colors"); int nRows = alignment->NRows(); ColumnProfile::iterator p, pe; int row, profileColumn; typedef map < char, int > CharIntMap; vector < CharIntMap > fitScores(nColumns); int minFit, maxFit; typedef vector < float > FloatVector;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?