alnmix.cpp

来自「ncbi源码」· C++ 代码 · 共 1,672 行 · 第 1/5 页

CPP
1,672
字号
/* * =========================================================================== * PRODUCTION $Log: alnmix.cpp,v $ * PRODUCTION Revision 1000.5  2004/06/01 19:40:45  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.95 * PRODUCTION * =========================================================================== *//*  $Id: alnmix.cpp,v 1000.5 2004/06/01 19:40:45 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Kamen Todorov, NCBI** File Description:*   Alignment merger** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/alnmgr/alnmix.hpp>#include <objects/seqalign/Seq_align_set.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqloc/Seq_id.hpp>// Object Manager includes#include <objmgr/scope.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::CAlnMix::CAlnMix(void)    : m_MergeFlags(0),      m_SingleRefseq(false),      m_ContainsAA(false),      m_ContainsNA(false){}CAlnMix::CAlnMix(CScope& scope)    : m_Scope(&scope),      m_MergeFlags(0),      m_SingleRefseq(false),      m_ContainsAA(false),      m_ContainsNA(false){}CAlnMix::~CAlnMix(void){}inlinebool CAlnMix::x_CompareAlnSeqScores(const CAlnMixSeq* aln_seq1,                                    const CAlnMixSeq* aln_seq2) {    return aln_seq1->m_Score > aln_seq2->m_Score;}inlinebool CAlnMix::x_CompareAlnMatchScores(const CRef<CAlnMixMatch>& aln_match1,                                       const CRef<CAlnMixMatch>& aln_match2) {    return aln_match1->m_Score > aln_match2->m_Score;}void CAlnMix::x_Reset(){    if (m_DS) {        m_DS.Reset();    }    if (m_Aln) {        m_Aln.Reset();    }    m_Segments.clear();    m_Rows.clear();    m_ExtraRows.clear();    ITERATE (TSeqs, seq_i, m_Seqs) {        (*seq_i)->m_Starts.clear();        (*seq_i)->m_ExtraRow = 0;    }}void CAlnMix::Merge(TMergeFlags flags){    if (m_InputDSs.empty()) {        NCBI_THROW(CAlnException, eMergeFailure,                   "CAlnMix::Merge(): "                   "No alignments were added for merging.");    }    if ( !m_DS  ||  m_MergeFlags != flags) {        x_Reset();        m_MergeFlags = flags;        if (m_MergeFlags & fTryOtherMethodOnFail) {            try {                x_Merge();            } catch(...) {                if (m_MergeFlags & fGen2EST) {                    m_MergeFlags &= !fGen2EST;                } else {                    m_MergeFlags |= fGen2EST;                }                try {                    x_Merge();                } catch(...) {                    NCBI_THROW(CAlnException, eUnknownMergeFailure,                               "CAlnMix::x_Merge(): "                               "Both Gen2EST and Nucl2Nucl "                               "merges failed.");                }            }        } else {            x_Merge();        }    }}void CAlnMix::Add(const CSeq_align& aln, TAddFlags flags){    if (m_InputAlnsMap.find((void *)&aln) == m_InputAlnsMap.end()) {        // add only if not already added        m_InputAlnsMap[(void *)&aln] = &aln;        m_InputAlns.push_back(CConstRef<CSeq_align>(&aln));        if (aln.GetSegs().IsDenseg()) {            Add(aln.GetSegs().GetDenseg(), flags);        } else if (aln.GetSegs().IsStd()) {            CRef<CSeq_align> sa = aln.CreateDensegFromStdseg(this);            Add(*sa, flags);        } else if (aln.GetSegs().IsDisc()) {            ITERATE (CSeq_align_set::Tdata,                     aln_it,                     aln.GetSegs().GetDisc().Get()) {                Add(**aln_it, flags);            }        }    }}void CAlnMix::Add(const CDense_seg &ds, TAddFlags flags){    const CDense_seg* dsp = &ds;    if (m_InputDSsMap.find((void *)dsp) != m_InputDSsMap.end()) {        return; // it has already been added    }    x_Reset();#if _DEBUG    dsp->Validate(true);#endif        m_InputDSsMap[(void *)dsp] = dsp;    // check if scope was given    if ( !(flags & fDontUseObjMgr  ||  m_Scope != 0) ) {        NCBI_THROW(CAlnException, eMergeFailure,                    "CAlnMix::Add(): "                   "AlnMix will not create a scope for you. "                   "Either create one in advance and provide reference "                   "through CAlnMix constructor or use fDontUseObjMgr flag.");    }    // translate (extend with widths) the dense-seg if necessary    if (flags & fForceTranslation  &&  !dsp->IsSetWidths()) {        if (flags & fDontUseObjMgr) {            string errstr = string("CAlnMix::Add(): ")                 + "Cannot force translation for Dense_seg "                + NStr::IntToString(m_InputDSs.size() + 1) + ". "                + "Neither CDense_seg::m_Widths are supplied, "                + "nor OM is used to identify molecule type.";            NCBI_THROW(CAlnException, eMergeFailure, errstr);        } else {            m_InputDSs.push_back(x_ExtendDSWithWidths(*dsp));            dsp = m_InputDSs.back();        }    } else {        m_InputDSs.push_back(CConstRef<CDense_seg>(dsp));    }    if (flags & fDontUseObjMgr  &&  flags & fCalcScore) {        NCBI_THROW(CAlnException, eMergeFailure, "CAlnMix::Add(): "                   "fCalcScore cannot be used together with fDontUseObjMgr");    }    m_AddFlags = flags;    int ds_index = m_InputDSs.size();    vector<CRef<CAlnMixSeq> > ds_seq;    // check the widths    if (dsp->IsSetWidths()) {        if (dsp->GetWidths().size() != (size_t) dsp->GetDim()) {            string errstr = string("CAlnMix::Add(): ")                + "Dense-seg "                + NStr::IntToString(ds_index)                + " has incorrect widths size ("                + NStr::IntToString(dsp->GetWidths().size())                + "). Should be equal to its dim ("                + NStr::IntToString(dsp->GetDim()) + ").";            NCBI_THROW(CAlnException, eMergeFailure, errstr);        }    }    //store the seqs    for (CAlnMap::TNumrow row = 0;  row < dsp->GetDim();  row++) {        CRef<CAlnMixSeq> aln_seq;        if (m_AddFlags & fDontUseObjMgr) {            // identify sequences by their seq ids as provided by            // the dense seg (not as reliable as with OM, but faster)            CRef<CSeq_id> seq_id(new CSeq_id);            seq_id->Assign(*dsp->GetIds()[row]);            TSeqIdMap::iterator it = m_SeqIds.find(seq_id);            if (it == m_SeqIds.end()) {                // add this seq id                aln_seq = new CAlnMixSeq();                m_SeqIds[seq_id] = aln_seq;                aln_seq->m_SeqId = seq_id;                aln_seq->m_DS_Count = 0;                // add this sequence                m_Seqs.push_back(aln_seq);                            // AA or NA?                if (dsp->IsSetWidths()) {                    if (dsp->GetWidths()[row] == 1) {                        aln_seq->m_IsAA = true;                        m_ContainsAA = true;                    } else {                        m_ContainsNA = true;                    }                }                             } else {                aln_seq = it->second;            }                    } else {            // uniquely identify the bioseq            x_IdentifyAlnMixSeq(aln_seq, *(ds.GetIds())[row]);#if _DEBUG            // Verify the widths (if exist)            if (dsp->IsSetWidths()) {                const int& width = dsp->GetWidths()[row];                if (width == 1  &&  aln_seq->m_IsAA != true  ||                    width == 3  &&  aln_seq->m_IsAA != false) {                    string errstr = string("CAlnMix::Add(): ")                        + "Incorrect width("                         + NStr::IntToString(width) +                        ") or molecule type(" +                         (aln_seq->m_IsAA ? "AA" : "NA") +                        ").";                    NCBI_THROW(CAlnException, eInvalidSegment,                               errstr);                }            }#endif        }        aln_seq->m_DS_Count++;        ds_seq.push_back(aln_seq);    }    //record all alignment relations    int              seg_off = 0;    TSignedSeqPos    start1, start2;    TSeqPos          len;    bool             single_chunk;    CAlnMap::TNumrow first_non_gapped_row_found;    bool             strands_exist =         dsp->GetStrands().size() == (size_t)dsp->GetNumseg() * dsp->GetDim();    for (CAlnMap::TNumseg seg =0;  seg < dsp->GetNumseg();  seg++) {        len = dsp->GetLens()[seg];        single_chunk = true;        for (CAlnMap::TNumrow row1 = 0;  row1 < dsp->GetDim();  row1++) {            if ((start1 = dsp->GetStarts()[seg_off + row1]) >= 0) {                //search for a match for the piece of seq on row1                CRef<CAlnMixSeq> aln_seq1 = ds_seq[row1];                for (CAlnMap::TNumrow row2 = row1+1;                     row2 < dsp->GetDim();  row2++) {                    if ((start2 = dsp->GetStarts()[seg_off + row2]) >= 0) {                        //match found                        if (single_chunk) {                            single_chunk = false;                            first_non_gapped_row_found = row1;                        }                                                //create the match                        CRef<CAlnMixMatch> match(new CAlnMixMatch);                        //add only pairs with the first_non_gapped_row_found                        //still, calc the score to be added to the seqs' scores                        if (row1 == first_non_gapped_row_found) {                            m_Matches.push_back(match);                        }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?