aln_scoring.cpp

来自「ncbi源码」· C++ 代码 · 共 362 行

CPP
362
字号
/* * =========================================================================== * PRODUCTION $Log: aln_scoring.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 21:07:04  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.11 * PRODUCTION * =========================================================================== *//*  $Id: aln_scoring.cpp,v 1000.3 2004/06/01 21:07:04 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Andrey Yazhuk */#include <ncbi_pch.hpp>#include <gui/widgets/aln_multiple/alnmulti_settings.hpp>#include <gui/widgets/aln_multiple/aln_scoring.hpp>#include <math.h>#include <stdio.h>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CSimpleScoringMethod::CSimpleScoringMethod(): m_Space(0), m_Gap(0){    m_vCharCounts.resize(256);    SetOptions(fIgnoreEmptySpace /*| fIgnoreGaps*/);}CSimpleScoringMethod::~CSimpleScoringMethod(){}void    CSimpleScoringMethod::SetOptions(int options){    m_Options = options;    m_Space = (m_Options & fIgnoreEmptySpace) ? ' ' : 0;    m_Gap = (m_Options & fIgnoreGaps) ? '-' : 0;}void    CSimpleScoringMethod::CreateColorTable(int size){    m_vColors.resize(size);    float K = 1.0f / (size - 1);    // unperfect agreement - gradient red    for( int i = 0;  i < size - 1;  i++ ) {        float score = i * K;        float gray = score * 0.8f;        m_vColors[i] =  CGlColor(1.0f, gray, gray);            }    m_vColors[size - 1] = CGlColor(0.9f, 0.9f, 0.9f); // perefect agreement - light gray       }string  CSimpleScoringMethod::GetName(){    return "Strict column agreement";}void    CSimpleScoringMethod::CalculateScores(char cons, const string& column,                                     TScore& col_score, TScoreVector& scores){    _ASSERT(scores.size() == column.size()  &&  m_vCharCounts.size()  == 256);        // reset histogram    size_t space_n = 0;       fill(m_vCharCounts.begin(), m_vCharCounts.end(), 0);        // calculate histogram    ITERATE(string, it, column) {        char c = *it;        if(c != m_Space  &&  c != m_Gap)  {            ++m_vCharCounts[(size_t) c];            } else {            space_n++;        }    }            // calculate column score     /*int max_count = 0, sum = 0;    for( int i = 0; i < 256; i++ )  { // ### performance        int n = m_vCharCounts[i];        if(n) {            sum += n * n;            max_count = max(n, max_count);        }    } */      size_t total = column.size() - space_n;    //col_score = sqrt((TScore)sum) / total;    col_score = 0.0; //###        // calculate individual scores        for( size_t i = 0; i < column.size(); i++  )  {        char c = column[i];        if(c != m_Space  &&  c != m_Gap)  {            scores[i] = ((TScore) m_vCharCounts[(size_t) c]) / total;        } else  {            scores[i] = 1.0;                    }    }    }string  CSNPScoringMethod::GetName(){    return "SNP Highlighting";}void    CSNPScoringMethod::CalculateScores(char cons, const string& column,                                     TScore& col_score, TScoreVector& scores){    _ASSERT(scores.size() == column.size());        for( size_t i = 0; i < column.size(); i++  )  {        char c = column[i];        if(c != m_Space  &&  c != m_Gap)  {            scores[i] = (c == cons) ? 1.0 : 0.0;        } else {            scores[i] = 1.0;                    }    }    }const CGlColor& CSimpleScoringMethod::GetColorForScore(TScore score) const{    _ASSERT(m_vColors.size());    size_t ind = (size_t) floor(score * m_vColors.size());    if(ind == m_vColors.size())        --ind;    return m_vColors[ind];}CScoreCache::CScoreCache(): m_pAlnVec(NULL),  m_pMethod(NULL),  m_GradNumber(16){    }    void    CScoreCache::SetGradNumber(int grad_n){    _ASSERT(grad_n > 1  && grad_n <= 0xFFFF);     m_GradNumber = grad_n;}void    CScoreCache::SetScoringMethod(IScoringMethod *method){    m_pMethod = method;}IScoringMethod*    CScoreCache::GetScoringMethod(){    return m_pMethod;}const IScoringMethod*    CScoreCache::GetScoringMethod() const{    return m_pMethod;}void CScoreCache::SetAlnVec(const CAlnVec* aln_vec){    m_pAlnVec = aln_vec;}/// Calculates scores for the given CAlnVec object and saves results in form of/// TScoreColl objects.void CScoreCache::CalculateScores(){    _ASSERT(m_pAlnVec);    _ASSERT(m_pMethod);    CStopWatch sw;    sw.Start();    TSeqPos start = m_pAlnVec->GetAlnStart();    TSeqPos stop = m_pAlnVec->GetAlnStop();    TNumrow row_n = m_pAlnVec->GetNumRows();    // preparing score collections    m_vScoreColls.resize(row_n);    NON_CONST_ITERATE(TScoreCollVector, itC, m_vScoreColls)  {        itC->SetFrom(start); // clear and initialize    }            string column(row_n, '\0');    TScore col_score = 0;    TScoreVector v_col_scores(row_n, 0.0f);    const TSeqPos kPageSize = 256;    x_AllocBuffer(kPageSize);        TScore grad_n = m_GradNumber;    TNumrow cons_row = m_pAlnVec->GetAnchor();    // iterate from "start" to "stop" using "sliding buffer"    for( TSeqPos pos = start;  pos < stop; )    {                    TSeqPos pos_stop = min(pos + kPageSize -1, stop);        x_UpdateBuffer(pos, pos_stop); // fetch next page in Seq Buffer        for( TSeqPos p = pos;  p <= pos_stop ;  p++ )    { // for each column            x_BufferGetColumn(p, column);            char cons = (cons_row > -1) ? column[cons_row] : 0;            m_pMethod->CalculateScores(cons, column, col_score, v_col_scores);                        // append scores to collections            for(TNumrow r = 0;  r < row_n;  r++ )  {                TScore sc = v_col_scores[r];                sc = ((int) (sc * grad_n)) / grad_n;                m_vScoreColls[r].push_back(sc);            }        }        pos = pos_stop + 1;    }    int total_int = 0;    NON_CONST_ITERATE(TScoreCollVector, itC, m_vScoreColls)  {        total_int += itC->size();    }            char s[128];    sprintf(s, "CScoreCache::CalculateScores() - total rows - %d intervals %d", row_n, total_int);    LOG_POST(s);        CAlnMultiUtils::ReportElapced("CScoreCache::CalculateScores()", sw);}const CScoreCache::TScoreColl&   CScoreCache::GetScores(TNumrow row) const{    _ASSERT(row >= 0  && row < (TNumrow) m_vScoreColls.size());    return m_vScoreColls[row];}////////////////////////////////////////////////////////////////////////////////// Sequence buffer management routinsinline char CScoreCache::x_BufferGetSeq(TSeqPos pos, TNumrow row) const{    _ASSERT(pos >= m_BufferStart  &&  pos < m_BufferStart + m_RowLength);    _ASSERT(row >= 0  &&  row < (TNumrow) m_vRows.size());    return m_vRows[row][pos - m_BufferStart];}void CScoreCache::x_AllocBuffer(TSeqPos row_len){    _ASSERT(m_pAlnVec);    int rows_n = m_pAlnVec->GetNumRows();    if(rows_n != (TNumrow) m_vRows.size()  ||  m_RowLength != row_len)    {        m_RowLength = row_len;            m_vRows.resize(rows_n);        NON_CONST_ITERATE(vector<string>, itR, m_vRows)   {            itR->resize(m_RowLength);        }        }}void CScoreCache::x_FreeBuffer(){    m_vRows.clear();}void CScoreCache::x_UpdateBuffer(TSeqPos start, TSeqPos stop){    _ASSERT(m_pAlnVec);    _ASSERT( (stop - start + 1) <= m_RowLength);        m_BufferStart = start;    CAlnVec::TSignedRange   range(start, stop);    TNumrow row_n = (TNumrow) m_vRows.size();    for( TNumrow r = 0;  r < row_n; r++ )  {        m_pAlnVec->GetAlnSeqString(m_vRows[r], r, range);          }}void CScoreCache::x_BufferGetColumn(TSeqPos pos, string& column) const{    _ASSERT(pos >= m_BufferStart  && pos < m_BufferStart + m_RowLength);        size_t col = pos - m_BufferStart;    for(size_t row = 0; row < m_vRows.size(); row++ )   {       column[row] = m_vRows[row][col];     }}END_NCBI_SCOPE/* * =========================================================================== * $Log: aln_scoring.cpp,v $ * Revision 1000.3  2004/06/01 21:07:04  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.11 * * Revision 1.11  2004/05/21 22:27:52  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.10  2004/04/02 16:38:11  yazhuk * Added to CSimpleScoringMethod options for ignoring empty space and gaps; * Added CSNPScoringMethod . * * Revision 1.9  2004/03/18 17:09:03  yazhuk * Added GetScoringMethod() * * Revision 1.8  2004/02/18 02:16:21  ucko * Tweak to avoid trying to invoke sqrt on an int. * * Revision 1.7  2004/02/17 15:20:51  yazhuk * Optimized scores calculation * * Revision 1.6  2004/02/11 17:43:09  yazhuk * Implemented GetName(); added comments * * Revision 1.5  2004/02/11 15:27:42  yazhuk * Changed color table generation * * Revision 1.4  2003/11/14 15:45:48  ucko * Likewise fix initialization of v_col_scores in * CScoreCache::CalculateScores for Compaq's compiler. * Qualify method names in previous log messages. * * Revision 1.3  2003/11/14 13:10:14  ucko * Tweak constructor of vCounts in CSimpleScoringMethod::CalculateScores * for Compaq's compiler. * * Revision 1.2  2003/10/11 18:20:34  ucko * Fixes for GCC 2.95: #include <stdio.h> for sprintf(); tweak constr. of * column in CScoreCache::CalculateScores to avoid triggering an inappropriate * template. * * Revision 1.1  2003/10/10 19:06:25  yazhuk * Initial revision * * =========================================================================== */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?