conservation_colorer.cpp

来自「ncbi源码」· C++ 代码 · 共 641 行 · 第 1/2 页

CPP
641
字号
/* * =========================================================================== * PRODUCTION $Log: conservation_colorer.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 18:28:29  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35 * PRODUCTION * =========================================================================== *//*  $Id: conservation_colorer.cpp,v 1000.3 2004/06/01 18:28:29 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Authors:  Paul Thiessen** File Description:*      Classes to color alignment blocks by sequence conservation** ===========================================================================*/#ifdef _MSC_VER#pragma warning(disable:4018)   // disable signed/unsigned mismatch warning in MSVC#endif#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>  // must come before C-toolkit stuff#include <corelib/ncbi_limits.h>#include <util/tables/raw_scoremat.h>#include "block_multiple_alignment.hpp"#include "conservation_colorer.hpp"#include "cn3d_tools.hpp"#include "cn3d_blast.hpp"#include <blastkar.h>           // for BLAST standard probability routines#include <objseq.h>#include <math.h>USING_NCBI_SCOPE;BEGIN_SCOPE(Cn3D)static inline char ScreenResidueCharacter(char original){    char ch = toupper(original);    switch (ch) {        case 'A': case 'R': case 'N': case 'D': case 'C':        case 'Q': case 'E': case 'G': case 'H': case 'I':        case 'L': case 'K': case 'M': case 'F': case 'P':        case 'S': case 'T': case 'W': case 'Y': case 'V':        case 'B': case 'Z':            break;        default:            ch = 'X'; // make all but natural aa's just 'X'    }    return ch;}int GetBLOSUM62Score(char a, char b){    static SNCBIFullScoreMatrix Blosum62Matrix;    static bool unpacked = false;    if (!unpacked) {        NCBISM_Unpack(&NCBISM_Blosum62, &Blosum62Matrix);        unpacked = true;    }    return Blosum62Matrix.s[ScreenResidueCharacter(a)][ScreenResidueCharacter(b)];}static int GetPSSMScore(const BLAST_Matrix *pssm, char ch, int masterIndex){    if (!pssm || masterIndex < 0 || masterIndex >= pssm->rows) {        ERRORMSG("GetPSSMScore() - invalid parameters");        return 0;    }    return pssm->matrix[masterIndex][LookupBLASTResidueNumberFromCharacter(ch)];}static map < char, float > StandardProbabilities;ConservationColorer::ConservationColorer(const BlockMultipleAlignment *parent) :    nColumns(0), basicColorsCurrent(false), fitColorsCurrent(false), alignment(parent){    if (StandardProbabilities.size() == 0) {  // initialize static stuff        static const char ncbistdaa2char[26] = {            'X', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',            'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z', 'X', 'X'        };        // calculate expected residue frequencies (standard probabilities)        // (code borrowed from SeqAlignInform() in cddutil.c)        BLAST_ScoreBlkPtr sbp = BLAST_ScoreBlkNew(Seq_code_ncbistdaa, 1);        BLAST_ResFreqPtr stdrfp = NULL;        int a;        if (!sbp) goto on_error;        sbp->read_in_matrix = TRUE;        sbp->protein_alphabet = TRUE;        sbp->posMatrix = NULL;        sbp->number_of_contexts = 1;        Nlm_Char BLOSUM62[20];        Nlm_StrCpy(BLOSUM62, "BLOSUM62");        // test C-toolkit data directory registry - which BlastScoreBlkMatFill() uses        Nlm_Char dataPath[255];        if (Nlm_FindPath("ncbi", "ncbi", "data", dataPath, 255))            TRACEMSG("FindPath() returned '" << dataPath << "'");        else            WARNINGMSG("FindPath() failed!");        if (BlastScoreBlkMatFill(sbp, BLOSUM62) != 0) goto on_error;        stdrfp = BlastResFreqNew(sbp);        if (!stdrfp) goto on_error;        if (BlastResFreqStdComp(sbp, stdrfp) != 0) goto on_error;        for (a=1; a<23; ++a) // from 'A' to 'Y'            StandardProbabilities[ncbistdaa2char[a]] = (float) stdrfp->prob[a];//        for (a=1; a<23; ++a)//            TESTMSG("std prob '" << ncbistdaa2char[a] << "' = "//                << setprecision(10) << StandardProbabilities[ncbistdaa2char[a]]);        goto cleanup;on_error:        ERRORMSG("ConservationColorer::ConservationColorer() - "            "error initializing standard residue probabilities");cleanup:        if (stdrfp) BlastResFreqDestruct(stdrfp);        if (sbp) BLAST_ScoreBlkDestruct(sbp);    }}void ConservationColorer::AddBlock(const UngappedAlignedBlock *block){    // sanity check    if (!block->IsFrom(alignment)) {        ERRORMSG("ConservationColorer::AddBlock : block is not from the associated alignment");        return;    }    blocks[block].resize(block->width);    // map block column position to profile position    for (int i=0; i<block->width; ++i) blocks[block][i] = nColumns + i;    nColumns += block->width;    basicColorsCurrent = fitColorsCurrent = false;}void ConservationColorer::CalculateBasicConservationColors(void){    if (basicColorsCurrent || blocks.size() == 0) return;    TRACEMSG("calculating basic conservation colors");    int nRows = alignment->NRows();    ColumnProfile::iterator p, pe, p2;    int row, profileColumn;    alignmentProfile.resize(nColumns);    typedef vector < int > IntVector;    IntVector varieties(nColumns, 0), weightedVarieties(nColumns, 0);    identities.resize(nColumns);    int minVariety, maxVariety, minWeightedVariety, maxWeightedVariety;    typedef vector < float > FloatVector;    FloatVector informationContents(nColumns, 0.0f);    float totalInformationContent = 0.0f;    BlockMap::const_iterator b, be = blocks.end();    for (b=blocks.begin(); b!=be; ++b) {        for (int blockColumn=0; blockColumn<b->first->width; ++blockColumn) {            profileColumn = b->second[blockColumn];            ColumnProfile& profile = alignmentProfile[profileColumn];            // create profile for this column            profile.clear();            for (row=0; row<nRows; ++row) {                char ch = ScreenResidueCharacter(b->first->GetCharacterAt(blockColumn, row));                if ((p=profile.find(ch)) != profile.end())                    ++(p->second);                else                    profile[ch] = 1;            }            pe = profile.end();            // identity for this column            if (profile.size() == 1 && profile.begin()->first != 'X')                identities[profileColumn] = true;            else                identities[profileColumn] = false;            // variety for this column            int& variety = varieties[profileColumn];            variety = profile.size();            if (profile.find('X') != profile.end())                variety += profile['X'] - 1; // each 'X' counts as one variety            if (blockColumn == 0 && b == blocks.begin()) {                minVariety = maxVariety = variety;            } else {                if (variety < minVariety) minVariety = variety;                else if (variety > maxVariety) maxVariety = variety;            }            // weighted variety for this column            int& weightedVariety = weightedVarieties[profileColumn];            for (p=profile.begin(); p!=pe; ++p) {                weightedVariety +=                    (p->second * (p->second - 1) / 2) * GetBLOSUM62Score(p->first, p->first);                p2 = p;                for (++p2; p2!=pe; ++p2)                    weightedVariety +=                        p->second * p2->second * GetBLOSUM62Score(p->first, p2->first);            }            if (blockColumn == 0 && b == blocks.begin()) {                minWeightedVariety = maxWeightedVariety = weightedVariety;            } else {                if (weightedVariety < minWeightedVariety) minWeightedVariety = weightedVariety;                else if (weightedVariety > maxWeightedVariety) maxWeightedVariety = weightedVariety;            }            // information content (calculated in bits -> logs of base 2) for this column            float &columnInfo = informationContents[profileColumn];            for (p=profile.begin(); p!=pe; ++p) {                static const float ln2 = log(2.0f), threshhold = 0.0001f;                float residueScore = 0.0f, expFreq = StandardProbabilities[p->first];                if (expFreq > threshhold) {                    float obsFreq = 1.0f * p->second / nRows,                          freqRatio = obsFreq / expFreq;                    if (freqRatio > threshhold) {                        residueScore = obsFreq * ((float) log(freqRatio)) / ln2;                        columnInfo += residueScore; // information content                    }                }            }            totalInformationContent += columnInfo;        }    }    INFOMSG("Total information content of aligned blocks: " << totalInformationContent << " bits");    // now assign colors    varietyColors.resize(nColumns);    weightedVarietyColors.resize(nColumns);    informationContentColors.resize(nColumns);    double scale;    for (profileColumn=0; profileColumn<nColumns; ++profileColumn) {        // variety        if (maxVariety == minVariety)            scale = 1.0;        else            scale = 1.0 - 1.0 * (varieties[profileColumn] - minVariety) / (maxVariety - minVariety);        varietyColors[profileColumn] = GlobalColors()->Get(Colors::eConservationMap, scale);        // weighted variety        if (maxWeightedVariety == minWeightedVariety)            scale = 1.0;        else            scale = 1.0 * (weightedVarieties[profileColumn] - minWeightedVariety) /                (maxWeightedVariety - minWeightedVariety);        weightedVarietyColors[profileColumn] = GlobalColors()->Get(Colors::eConservationMap, scale);        // information content, based on absolute scale        static const float minInform = 0.10f, maxInform = 6.24f;        scale = (informationContents[profileColumn] - minInform) / (maxInform - minInform);        if (scale < 0.0) scale = 0.0;        else if (scale > 1.0) scale = 1.0;        scale = sqrt(scale);    // apply non-linearity so that lower values are better distinguished        informationContentColors[profileColumn] = GlobalColors()->Get(Colors::eConservationMap, scale);    }    basicColorsCurrent = true;    fitColorsCurrent = false;}void ConservationColorer::CalculateFitConservationColors(void){    if (fitColorsCurrent || blocks.size() == 0) return;    CalculateBasicConservationColors();     // also includes profile    TRACEMSG("calculating fit conservation colors");    int nRows = alignment->NRows();    ColumnProfile::iterator p, pe;    int row, profileColumn;    typedef map < char, int > CharIntMap;    vector < CharIntMap > fitScores(nColumns);    int minFit, maxFit;    typedef vector < float > FloatVector;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?