alnmix.cpp
来自「ncbi源码」· C++ 代码 · 共 1,672 行 · 第 1/5 页
CPP
1,672 行
/* * =========================================================================== * PRODUCTION $Log: alnmix.cpp,v $ * PRODUCTION Revision 1000.5 2004/06/01 19:40:45 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.95 * PRODUCTION * =========================================================================== *//* $Id: alnmix.cpp,v 1000.5 2004/06/01 19:40:45 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Kamen Todorov, NCBI** File Description:* Alignment merger** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/alnmgr/alnmix.hpp>#include <objects/seqalign/Seq_align_set.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqloc/Seq_id.hpp>// Object Manager includes#include <objmgr/scope.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::CAlnMix::CAlnMix(void) : m_MergeFlags(0), m_SingleRefseq(false), m_ContainsAA(false), m_ContainsNA(false){}CAlnMix::CAlnMix(CScope& scope) : m_Scope(&scope), m_MergeFlags(0), m_SingleRefseq(false), m_ContainsAA(false), m_ContainsNA(false){}CAlnMix::~CAlnMix(void){}inlinebool CAlnMix::x_CompareAlnSeqScores(const CAlnMixSeq* aln_seq1, const CAlnMixSeq* aln_seq2) { return aln_seq1->m_Score > aln_seq2->m_Score;}inlinebool CAlnMix::x_CompareAlnMatchScores(const CRef<CAlnMixMatch>& aln_match1, const CRef<CAlnMixMatch>& aln_match2) { return aln_match1->m_Score > aln_match2->m_Score;}void CAlnMix::x_Reset(){ if (m_DS) { m_DS.Reset(); } if (m_Aln) { m_Aln.Reset(); } m_Segments.clear(); m_Rows.clear(); m_ExtraRows.clear(); ITERATE (TSeqs, seq_i, m_Seqs) { (*seq_i)->m_Starts.clear(); (*seq_i)->m_ExtraRow = 0; }}void CAlnMix::Merge(TMergeFlags flags){ if (m_InputDSs.empty()) { NCBI_THROW(CAlnException, eMergeFailure, "CAlnMix::Merge(): " "No alignments were added for merging."); } if ( !m_DS || m_MergeFlags != flags) { x_Reset(); m_MergeFlags = flags; if (m_MergeFlags & fTryOtherMethodOnFail) { try { x_Merge(); } catch(...) { if (m_MergeFlags & fGen2EST) { m_MergeFlags &= !fGen2EST; } else { m_MergeFlags |= fGen2EST; } try { x_Merge(); } catch(...) { NCBI_THROW(CAlnException, eUnknownMergeFailure, "CAlnMix::x_Merge(): " "Both Gen2EST and Nucl2Nucl " "merges failed."); } } } else { x_Merge(); } }}void CAlnMix::Add(const CSeq_align& aln, TAddFlags flags){ if (m_InputAlnsMap.find((void *)&aln) == m_InputAlnsMap.end()) { // add only if not already added m_InputAlnsMap[(void *)&aln] = &aln; m_InputAlns.push_back(CConstRef<CSeq_align>(&aln)); if (aln.GetSegs().IsDenseg()) { Add(aln.GetSegs().GetDenseg(), flags); } else if (aln.GetSegs().IsStd()) { CRef<CSeq_align> sa = aln.CreateDensegFromStdseg(this); Add(*sa, flags); } else if (aln.GetSegs().IsDisc()) { ITERATE (CSeq_align_set::Tdata, aln_it, aln.GetSegs().GetDisc().Get()) { Add(**aln_it, flags); } } }}void CAlnMix::Add(const CDense_seg &ds, TAddFlags flags){ const CDense_seg* dsp = &ds; if (m_InputDSsMap.find((void *)dsp) != m_InputDSsMap.end()) { return; // it has already been added } x_Reset();#if _DEBUG dsp->Validate(true);#endif m_InputDSsMap[(void *)dsp] = dsp; // check if scope was given if ( !(flags & fDontUseObjMgr || m_Scope != 0) ) { NCBI_THROW(CAlnException, eMergeFailure, "CAlnMix::Add(): " "AlnMix will not create a scope for you. " "Either create one in advance and provide reference " "through CAlnMix constructor or use fDontUseObjMgr flag."); } // translate (extend with widths) the dense-seg if necessary if (flags & fForceTranslation && !dsp->IsSetWidths()) { if (flags & fDontUseObjMgr) { string errstr = string("CAlnMix::Add(): ") + "Cannot force translation for Dense_seg " + NStr::IntToString(m_InputDSs.size() + 1) + ". " + "Neither CDense_seg::m_Widths are supplied, " + "nor OM is used to identify molecule type."; NCBI_THROW(CAlnException, eMergeFailure, errstr); } else { m_InputDSs.push_back(x_ExtendDSWithWidths(*dsp)); dsp = m_InputDSs.back(); } } else { m_InputDSs.push_back(CConstRef<CDense_seg>(dsp)); } if (flags & fDontUseObjMgr && flags & fCalcScore) { NCBI_THROW(CAlnException, eMergeFailure, "CAlnMix::Add(): " "fCalcScore cannot be used together with fDontUseObjMgr"); } m_AddFlags = flags; int ds_index = m_InputDSs.size(); vector<CRef<CAlnMixSeq> > ds_seq; // check the widths if (dsp->IsSetWidths()) { if (dsp->GetWidths().size() != (size_t) dsp->GetDim()) { string errstr = string("CAlnMix::Add(): ") + "Dense-seg " + NStr::IntToString(ds_index) + " has incorrect widths size (" + NStr::IntToString(dsp->GetWidths().size()) + "). Should be equal to its dim (" + NStr::IntToString(dsp->GetDim()) + ")."; NCBI_THROW(CAlnException, eMergeFailure, errstr); } } //store the seqs for (CAlnMap::TNumrow row = 0; row < dsp->GetDim(); row++) { CRef<CAlnMixSeq> aln_seq; if (m_AddFlags & fDontUseObjMgr) { // identify sequences by their seq ids as provided by // the dense seg (not as reliable as with OM, but faster) CRef<CSeq_id> seq_id(new CSeq_id); seq_id->Assign(*dsp->GetIds()[row]); TSeqIdMap::iterator it = m_SeqIds.find(seq_id); if (it == m_SeqIds.end()) { // add this seq id aln_seq = new CAlnMixSeq(); m_SeqIds[seq_id] = aln_seq; aln_seq->m_SeqId = seq_id; aln_seq->m_DS_Count = 0; // add this sequence m_Seqs.push_back(aln_seq); // AA or NA? if (dsp->IsSetWidths()) { if (dsp->GetWidths()[row] == 1) { aln_seq->m_IsAA = true; m_ContainsAA = true; } else { m_ContainsNA = true; } } } else { aln_seq = it->second; } } else { // uniquely identify the bioseq x_IdentifyAlnMixSeq(aln_seq, *(ds.GetIds())[row]);#if _DEBUG // Verify the widths (if exist) if (dsp->IsSetWidths()) { const int& width = dsp->GetWidths()[row]; if (width == 1 && aln_seq->m_IsAA != true || width == 3 && aln_seq->m_IsAA != false) { string errstr = string("CAlnMix::Add(): ") + "Incorrect width(" + NStr::IntToString(width) + ") or molecule type(" + (aln_seq->m_IsAA ? "AA" : "NA") + ")."; NCBI_THROW(CAlnException, eInvalidSegment, errstr); } }#endif } aln_seq->m_DS_Count++; ds_seq.push_back(aln_seq); } //record all alignment relations int seg_off = 0; TSignedSeqPos start1, start2; TSeqPos len; bool single_chunk; CAlnMap::TNumrow first_non_gapped_row_found; bool strands_exist = dsp->GetStrands().size() == (size_t)dsp->GetNumseg() * dsp->GetDim(); for (CAlnMap::TNumseg seg =0; seg < dsp->GetNumseg(); seg++) { len = dsp->GetLens()[seg]; single_chunk = true; for (CAlnMap::TNumrow row1 = 0; row1 < dsp->GetDim(); row1++) { if ((start1 = dsp->GetStarts()[seg_off + row1]) >= 0) { //search for a match for the piece of seq on row1 CRef<CAlnMixSeq> aln_seq1 = ds_seq[row1]; for (CAlnMap::TNumrow row2 = row1+1; row2 < dsp->GetDim(); row2++) { if ((start2 = dsp->GetStarts()[seg_off + row2]) >= 0) { //match found if (single_chunk) { single_chunk = false; first_non_gapped_row_found = row1; } //create the match CRef<CAlnMixMatch> match(new CAlnMixMatch); //add only pairs with the first_non_gapped_row_found //still, calc the score to be added to the seqs' scores if (row1 == first_non_gapped_row_found) { m_Matches.push_back(match); }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?