📄 splign_hit.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: splign_hit.cpp,v $ * PRODUCTION Revision 1000.0 2004/06/01 18:12:48 gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.8 * PRODUCTION * =========================================================================== *//* $Id: splign_hit.cpp,v 1000.0 2004/06/01 18:12:48 gouriano Exp $* ===========================================================================** public DOMAIN NOTICE * National Center for Biotechnology Information* * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular* purpose. * * Please cite the author in any work or product based on this material. ** ===========================================================================** Author: Yuri Kapustin** File Description:* Temporary code - will be part of the hit filtering library**/#include <ncbi_pch.hpp>#include <algo/align/splign/splign_hit.hpp>#include <algo/align/align_exception.hpp>#include <corelib/ncbistre.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <algorithm>#include <numeric>#include <math.h>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CHit::CHit(){ x_InitDefaults();}void CHit::x_InitDefaults(){ m_Idnty = m_Score = m_Value = m_MM = m_Gaps = 0; m_GroupID = -1; m_MaxDistClustID = -1; m_an[0] = m_an[1] = m_an[2] = m_an[3] = 0; m_ai[0] = m_ai[1] = m_ai[2] = m_ai[3] = 0;}// constructs new hit by merging two existingCHit::CHit(const CHit& a, const CHit& b){ x_InitDefaults(); bool bsa = a.IsStraight(); bool bsb = b.IsStraight(); bool bBothStraight = (bsa && bsb); bool bBothInverse = (!bsa && !bsb); if(!bBothStraight && !bBothInverse) { NCBI_THROW( CAlgoAlignException, eInternal, "Cannot merge hits with diferent strands"); } m_an[0] = min(a.m_an[0],min(a.m_an[1],min(b.m_an[0],b.m_an[1]))); m_an[1] = max(a.m_an[0],max(a.m_an[1],max(b.m_an[0],b.m_an[1]))); m_an[2] = min(a.m_an[2],min(a.m_an[3],min(b.m_an[2],b.m_an[3]))); m_an[3] = max(a.m_an[2],max(a.m_an[3],max(b.m_an[2],b.m_an[3]))); // make size the same int n1 = m_an[1]-m_an[0]; int n2 = m_an[3]-m_an[2]; if(n1 != n2) { int n = min(n1,n2); m_an[1] = m_an[0] + n; m_an[3] = m_an[2] + n; } if(bBothInverse) { int k = m_an[2]; m_an[2] = m_an[3]; m_an[3] = k; } // adjust parameters m_Query = a.m_Query; m_Subj = a.m_Subj; m_GroupID = a.m_GroupID; m_Score = a.m_Score + b.m_Score; m_anLength[0] = abs(m_an[1]-m_an[0]) + 1; m_anLength[1] = abs(m_an[3]-m_an[2]) + 1; m_Length = max(m_anLength[0], m_anLength[1]); m_Value = (double(a.m_Length)*a.m_Value + double(b.m_Length)*b.m_Value) / (a.m_Length + b.m_Length); m_Idnty = (a.m_Length*a.m_Idnty + b.m_Length*b.m_Idnty) / (a.m_Length + b.m_Length); m_Gaps = a.m_Gaps + b.m_Gaps; m_MM = a.m_MM + b.m_MM; SetEnvelop();}CHit::CHit(const string& strTemplate){ x_InitDefaults(); int nCount = 0; string::const_iterator ii = strTemplate.begin(), ii_end = strTemplate.end(), jj = ii; while( jj < ii_end ) { ii = find(jj, ii_end, '\t'); string s (jj, ii); if(s == "-") { s = "-1"; } CNcbiIstrstream ss (s.c_str()); switch(nCount++) { case 0: m_Query = s; break; case 1: m_Subj = s; break; case 2: ss >> m_Idnty; break; case 3: ss >> m_Length; break; case 4: ss >> m_MM; break; case 5: ss >> m_Gaps; break; case 6: ss >> m_an[0]; break; case 7: ss >> m_an[1]; break; case 8: ss >> m_an[2]; break; case 9: ss >> m_an[3]; break; case 10: ss >> m_Value; break; case 11: ss >> m_Score; break; default: { NCBI_THROW( CAlgoAlignException, eFormat, "Too many fields while parsing hit line."); } } jj = ii + 1; } if (nCount < 12) { NCBI_THROW( CAlgoAlignException, eFormat, "One or more fields missing."); } // u-turn the hit if necessary if(m_an[0] > m_an[1] && m_an[2] > m_an[3]) { int n = m_an[0]; m_an[0] = m_an[1]; m_an[1] = n; n = m_an[2]; m_an[2] = m_an[3]; m_an[3] = n; } SetEnvelop(); m_anLength[0] = m_ai[1] - m_ai[0] + 1; m_anLength[1] = m_ai[3] - m_ai[2] + 1;}CHit::CHit(const CSeq_align& sa){ x_InitDefaults(); if (sa.CheckNumRows() != 2) { NCBI_THROW(CAlgoAlignException, eBadParameter, "CHit::CHit passed seq-align of dimension != 2"); } m_Query = sa.GetSeq_id(0).GetSeqIdString(true); m_Subj = sa.GetSeq_id(1).GetSeqIdString(true); m_an[0] = sa.GetSeqStart(0) + 1; m_an[1] = sa.GetSeqStop(0) + 1; m_an[2] = sa.GetSeqStart(1) + 1; m_an[3] = sa.GetSeqStop(1) + 1; if (!sa.GetSegs().IsDenseg()) { NCBI_THROW(CAlgoAlignException, eBadParameter, "CHit::CHit passed a seq-align that isn't a dense-seg."); } const CDense_seg &ds = sa.GetSegs().GetDenseg(); const CDense_seg::TStrands& strands = ds.GetStrands(); const CDense_seg::TLens& lens = ds.GetLens(); bool strand_query; if(strands[0] == eNa_strand_plus) { strand_query = true; } else if(strands[0] == eNa_strand_minus) { strand_query = false; } else { NCBI_THROW(CAlgoAlignException, eBadParameter, "Unexpected query strand reported to CHit::CHit(CSeq_align)."); } bool strand_subj; if( strands[1] == eNa_strand_plus) { strand_subj = true; } else if(strands[1] == eNa_strand_minus) { strand_subj = false; } else { NCBI_THROW(CAlgoAlignException, eBadParameter, "Unexpected subject strand reported to CHit::CHit(CSeq_align)."); } if (strand_query != strand_subj) { swap(m_an[2], m_an[3]); } sa.GetNamedScore("num_ident", m_Idnty); sa.GetNamedScore("bit_score", m_Score); sa.GetNamedScore("sum_e", m_Value); SetEnvelop(); m_anLength[0] = m_ai[1] - m_ai[0] + 1; m_anLength[1] = m_ai[3] - m_ai[2] + 1; m_Length = accumulate(lens.begin(), lens.end(), 0); m_Idnty = 100 * m_Idnty / m_Length; //convert # identities to % identity
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -