aln_scoring.cpp
来自「ncbi源码」· C++ 代码 · 共 362 行
CPP
362 行
/* * =========================================================================== * PRODUCTION $Log: aln_scoring.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 21:07:04 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.11 * PRODUCTION * =========================================================================== *//* $Id: aln_scoring.cpp,v 1000.3 2004/06/01 21:07:04 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Andrey Yazhuk */#include <ncbi_pch.hpp>#include <gui/widgets/aln_multiple/alnmulti_settings.hpp>#include <gui/widgets/aln_multiple/aln_scoring.hpp>#include <math.h>#include <stdio.h>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CSimpleScoringMethod::CSimpleScoringMethod(): m_Space(0), m_Gap(0){ m_vCharCounts.resize(256); SetOptions(fIgnoreEmptySpace /*| fIgnoreGaps*/);}CSimpleScoringMethod::~CSimpleScoringMethod(){}void CSimpleScoringMethod::SetOptions(int options){ m_Options = options; m_Space = (m_Options & fIgnoreEmptySpace) ? ' ' : 0; m_Gap = (m_Options & fIgnoreGaps) ? '-' : 0;}void CSimpleScoringMethod::CreateColorTable(int size){ m_vColors.resize(size); float K = 1.0f / (size - 1); // unperfect agreement - gradient red for( int i = 0; i < size - 1; i++ ) { float score = i * K; float gray = score * 0.8f; m_vColors[i] = CGlColor(1.0f, gray, gray); } m_vColors[size - 1] = CGlColor(0.9f, 0.9f, 0.9f); // perefect agreement - light gray }string CSimpleScoringMethod::GetName(){ return "Strict column agreement";}void CSimpleScoringMethod::CalculateScores(char cons, const string& column, TScore& col_score, TScoreVector& scores){ _ASSERT(scores.size() == column.size() && m_vCharCounts.size() == 256); // reset histogram size_t space_n = 0; fill(m_vCharCounts.begin(), m_vCharCounts.end(), 0); // calculate histogram ITERATE(string, it, column) { char c = *it; if(c != m_Space && c != m_Gap) { ++m_vCharCounts[(size_t) c]; } else { space_n++; } } // calculate column score /*int max_count = 0, sum = 0; for( int i = 0; i < 256; i++ ) { // ### performance int n = m_vCharCounts[i]; if(n) { sum += n * n; max_count = max(n, max_count); } } */ size_t total = column.size() - space_n; //col_score = sqrt((TScore)sum) / total; col_score = 0.0; //### // calculate individual scores for( size_t i = 0; i < column.size(); i++ ) { char c = column[i]; if(c != m_Space && c != m_Gap) { scores[i] = ((TScore) m_vCharCounts[(size_t) c]) / total; } else { scores[i] = 1.0; } } }string CSNPScoringMethod::GetName(){ return "SNP Highlighting";}void CSNPScoringMethod::CalculateScores(char cons, const string& column, TScore& col_score, TScoreVector& scores){ _ASSERT(scores.size() == column.size()); for( size_t i = 0; i < column.size(); i++ ) { char c = column[i]; if(c != m_Space && c != m_Gap) { scores[i] = (c == cons) ? 1.0 : 0.0; } else { scores[i] = 1.0; } } }const CGlColor& CSimpleScoringMethod::GetColorForScore(TScore score) const{ _ASSERT(m_vColors.size()); size_t ind = (size_t) floor(score * m_vColors.size()); if(ind == m_vColors.size()) --ind; return m_vColors[ind];}CScoreCache::CScoreCache(): m_pAlnVec(NULL), m_pMethod(NULL), m_GradNumber(16){ } void CScoreCache::SetGradNumber(int grad_n){ _ASSERT(grad_n > 1 && grad_n <= 0xFFFF); m_GradNumber = grad_n;}void CScoreCache::SetScoringMethod(IScoringMethod *method){ m_pMethod = method;}IScoringMethod* CScoreCache::GetScoringMethod(){ return m_pMethod;}const IScoringMethod* CScoreCache::GetScoringMethod() const{ return m_pMethod;}void CScoreCache::SetAlnVec(const CAlnVec* aln_vec){ m_pAlnVec = aln_vec;}/// Calculates scores for the given CAlnVec object and saves results in form of/// TScoreColl objects.void CScoreCache::CalculateScores(){ _ASSERT(m_pAlnVec); _ASSERT(m_pMethod); CStopWatch sw; sw.Start(); TSeqPos start = m_pAlnVec->GetAlnStart(); TSeqPos stop = m_pAlnVec->GetAlnStop(); TNumrow row_n = m_pAlnVec->GetNumRows(); // preparing score collections m_vScoreColls.resize(row_n); NON_CONST_ITERATE(TScoreCollVector, itC, m_vScoreColls) { itC->SetFrom(start); // clear and initialize } string column(row_n, '\0'); TScore col_score = 0; TScoreVector v_col_scores(row_n, 0.0f); const TSeqPos kPageSize = 256; x_AllocBuffer(kPageSize); TScore grad_n = m_GradNumber; TNumrow cons_row = m_pAlnVec->GetAnchor(); // iterate from "start" to "stop" using "sliding buffer" for( TSeqPos pos = start; pos < stop; ) { TSeqPos pos_stop = min(pos + kPageSize -1, stop); x_UpdateBuffer(pos, pos_stop); // fetch next page in Seq Buffer for( TSeqPos p = pos; p <= pos_stop ; p++ ) { // for each column x_BufferGetColumn(p, column); char cons = (cons_row > -1) ? column[cons_row] : 0; m_pMethod->CalculateScores(cons, column, col_score, v_col_scores); // append scores to collections for(TNumrow r = 0; r < row_n; r++ ) { TScore sc = v_col_scores[r]; sc = ((int) (sc * grad_n)) / grad_n; m_vScoreColls[r].push_back(sc); } } pos = pos_stop + 1; } int total_int = 0; NON_CONST_ITERATE(TScoreCollVector, itC, m_vScoreColls) { total_int += itC->size(); } char s[128]; sprintf(s, "CScoreCache::CalculateScores() - total rows - %d intervals %d", row_n, total_int); LOG_POST(s); CAlnMultiUtils::ReportElapced("CScoreCache::CalculateScores()", sw);}const CScoreCache::TScoreColl& CScoreCache::GetScores(TNumrow row) const{ _ASSERT(row >= 0 && row < (TNumrow) m_vScoreColls.size()); return m_vScoreColls[row];}////////////////////////////////////////////////////////////////////////////////// Sequence buffer management routinsinline char CScoreCache::x_BufferGetSeq(TSeqPos pos, TNumrow row) const{ _ASSERT(pos >= m_BufferStart && pos < m_BufferStart + m_RowLength); _ASSERT(row >= 0 && row < (TNumrow) m_vRows.size()); return m_vRows[row][pos - m_BufferStart];}void CScoreCache::x_AllocBuffer(TSeqPos row_len){ _ASSERT(m_pAlnVec); int rows_n = m_pAlnVec->GetNumRows(); if(rows_n != (TNumrow) m_vRows.size() || m_RowLength != row_len) { m_RowLength = row_len; m_vRows.resize(rows_n); NON_CONST_ITERATE(vector<string>, itR, m_vRows) { itR->resize(m_RowLength); } }}void CScoreCache::x_FreeBuffer(){ m_vRows.clear();}void CScoreCache::x_UpdateBuffer(TSeqPos start, TSeqPos stop){ _ASSERT(m_pAlnVec); _ASSERT( (stop - start + 1) <= m_RowLength); m_BufferStart = start; CAlnVec::TSignedRange range(start, stop); TNumrow row_n = (TNumrow) m_vRows.size(); for( TNumrow r = 0; r < row_n; r++ ) { m_pAlnVec->GetAlnSeqString(m_vRows[r], r, range); }}void CScoreCache::x_BufferGetColumn(TSeqPos pos, string& column) const{ _ASSERT(pos >= m_BufferStart && pos < m_BufferStart + m_RowLength); size_t col = pos - m_BufferStart; for(size_t row = 0; row < m_vRows.size(); row++ ) { column[row] = m_vRows[row][col]; }}END_NCBI_SCOPE/* * =========================================================================== * $Log: aln_scoring.cpp,v $ * Revision 1000.3 2004/06/01 21:07:04 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.11 * * Revision 1.11 2004/05/21 22:27:52 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.10 2004/04/02 16:38:11 yazhuk * Added to CSimpleScoringMethod options for ignoring empty space and gaps; * Added CSNPScoringMethod . * * Revision 1.9 2004/03/18 17:09:03 yazhuk * Added GetScoringMethod() * * Revision 1.8 2004/02/18 02:16:21 ucko * Tweak to avoid trying to invoke sqrt on an int. * * Revision 1.7 2004/02/17 15:20:51 yazhuk * Optimized scores calculation * * Revision 1.6 2004/02/11 17:43:09 yazhuk * Implemented GetName(); added comments * * Revision 1.5 2004/02/11 15:27:42 yazhuk * Changed color table generation * * Revision 1.4 2003/11/14 15:45:48 ucko * Likewise fix initialization of v_col_scores in * CScoreCache::CalculateScores for Compaq's compiler. * Qualify method names in previous log messages. * * Revision 1.3 2003/11/14 13:10:14 ucko * Tweak constructor of vCounts in CSimpleScoringMethod::CalculateScores * for Compaq's compiler. * * Revision 1.2 2003/10/11 18:20:34 ucko * Fixes for GCC 2.95: #include <stdio.h> for sprintf(); tweak constr. of * column in CScoreCache::CalculateScores to avoid triggering an inappropriate * template. * * Revision 1.1 2003/10/10 19:06:25 yazhuk * Initial revision * * =========================================================================== */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?