alignment_smear.cpp

来自「ncbi源码」· C++ 代码 · 共 367 行

CPP
367
字号
/* * =========================================================================== * PRODUCTION $Log: alignment_smear.cpp,v $ * PRODUCTION Revision 1000.0  2004/06/01 21:19:53  gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.2 * PRODUCTION * =========================================================================== *//*  $Id: alignment_smear.cpp,v 1000.0 2004/06/01 21:19:53 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Robert Smith * * File Description: * */#include <ncbi_pch.hpp>#include <gui/objutils/alignment_smear.hpp>#include <objtools/alnmgr/alnmap.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/annot_selector.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/seq/Annot_descr.hpp>#include <objects/seq/Annotdesc.hpp>#include <objects/general/User_object.hpp>#include <objects/general/User_field.hpp>#include <objects/general/Object_id.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CAlignmentSmear::CAlignmentSmear(        const objects::CBioseq_Handle& handle,         TSeqPos start,         TSeqPos stop,         TSeqPos window,        EAlignSmearStrand strand_type    )    : m_BioseqHandle(handle),    m_AccumSeg(start, stop, window),    m_AccumGap(start, stop, window),    m_StrandType(strand_type){}/// smear all the alignments in this annotation.void CAlignmentSmear::AddAnnot(const CSeq_annot& seq_annot){    objects::SAnnotSelector sel;    sel.SetLimitSeqAnnot(&seq_annot)       .SetSortOrder(SAnnotSelector::eSortOrder_None);    AddAlignments(sel);        // now if there is a chance we would put more than one annotation    // in a smear we need to join their names together here, or something else?    SetLabel(x_GetAnnotName(seq_annot));}#if 1/// Smear all the alignments matchec by this selector on my bioseq.void CAlignmentSmear::AddAlignments(const objects::SAnnotSelector& sel){    TSeqPos start(m_AccumSeg.GetStart());    TSeqPos stop(m_AccumSeg.GetStop());             // grab a alignment iterator for our range    CAlign_CI align_iter(m_BioseqHandle, start, stop, sel);    for (int ai = 0; align_iter; ++align_iter, ++ai) {        const CSeq_align& align = *align_iter;                // convert to an AlnMap        CRef< CAlnMap> aln_map;        if (align.GetSegs().IsStd()) {            continue;   // TODO: Make  StdSeqs work.            CRef<CSeq_align> ds_align = align.CreateDensegFromStdseg();            aln_map =  new CAlnMap( ds_align->GetSegs().GetDenseg());        } else if (align.GetSegs().IsDenseg()) {            aln_map = new CAlnMap(align.GetSegs().GetDenseg());        }        AddAlignment(*aln_map);    }    MaskGaps();}#else/* same thing but we group alignments by their id *//// Smear all the alignments matchec by this selector on my bioseq.void CAlignmentSmear::AddAlignments(const objects::SAnnotSelector& sel){    TSeqPos start(m_AccumSeg.GetStart());    TSeqPos stop(m_AccumSeg.GetStop());          typedef map<string, CRef<CAlnMix> > TMixes;    TMixes  mixes;       // grab a alignment iterator for our range    CAlign_CI align_iter(m_BioseqHandle, start, stop, sel);    for (int ai = 0; align_iter; ++align_iter, ++ai) {        const CSeq_align& align = *align_iter;        // convert to an AlnMap        CRef< CAlnMap> aln_map;        if (align.GetSegs().IsStd()) {            CRef<CSeq_align> ds_align = align.CreateDensegFromStdseg();            aln_map =  new CAlnMap( ds_align->GetSegs().GetDenseg());        } else if (align.GetSegs().IsDenseg()) {            aln_map = new CAlnMap(align.GetSegs().GetDenseg());        }        // which row of the alignment is the other sequence on?        CAlnMap::TNumrow row = m_BioseqHandle.IsSynonym(                             aln_map->GetSeqId(0)) ? 1 : 0;                                    // What is that sequences label?        string  align_label;        aln_map->GetSeqId(row).GetLabel(&align_label);                // Mix the alignments based on their label.        TMixes::const_iterator mit = mixes.find(align_label);        if (mit == mixes.end()) {            CRef<CAlnMix> mix_ref(new CAlnMix(m_BioseqHandle.GetScope()));            mixes[align_label] = mix_ref;        }         mixes[align_label]->Add(aln_map->GetDenseg());    }        // Now smear the mixed alignments.    NON_CONST_ITERATE(TMixes, mit, mixes)    {        CAlnMix & aln_mix(*(mit->second));        aln_mix.Merge(CAlnMix::fGen2EST |                          CAlnMix::fTryOtherMethodOnFail |                          CAlnMix::fGapJoin);                 // convert to a CAlnMap        CAlnMap aln_map(aln_mix.GetDenseg());        AddAlignment(aln_map);    }        // take out gaps in the middle of blocks.    MaskGaps();}#endifvoid CAlignmentSmear::AddAlignment(CAlnMap& aln_map){    // check number of dimensions    if ( aln_map.GetNumRows() != 2) {        // cout << "alignment does not have 2 rows." << endl;        return;    }    // find out what row our bioseq is on.            CAlnMap::TNumrow bs_row = m_BioseqHandle.IsSynonym(                             aln_map.GetSeqId(0)) ? 0 : 1;    _ASSERT( bs_row == 0  ||  bs_row == 1 );    CAlnMap::TNumrow other_row = 1 - bs_row; // works since bs_row is always 0 or 1.    // Are we only doing one strand?    if (m_StrandType == eSmearStrand_Pos  && aln_map.IsNegativeStrand(bs_row) )        return;    if (m_StrandType == eSmearStrand_Neg  && aln_map.IsPositiveStrand(bs_row) )        return;        CAlnMap::TSignedRange range(aln_map.GetAlnStart(),                                aln_map.GetAlnStop());    CRef<CAlnMap::CAlnChunkVec> aln_chunks            (aln_map.GetAlnChunks(other_row, range,                       CAlnMap::fSeqOnly ));                        // | CAlnMap::fChunkSameAsSeg));                           // Smear all the segments.    for (size_t i = 0;  i < aln_chunks->size();  ++i) {        CConstRef<CAlnMap::CAlnChunk> chunk((*aln_chunks)[i]);                TSeqPos start = chunk->GetRange().GetFrom();        start = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, start);        TSeqPos stop = chunk->GetRange().GetTo();        stop = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, stop);                m_AccumSeg.AddRange(TSeqRange(start, stop), 1);    }        // Smear the gaps.    for (size_t i = 1;  i < aln_chunks->size();  ++i) {        CConstRef<CAlnMap::CAlnChunk> prev_chunk((*aln_chunks)[i-1]);        CConstRef<CAlnMap::CAlnChunk> chunk((*aln_chunks)[i]);        TSeqPos start = prev_chunk->GetRange().GetTo();        start = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, start);        TSeqPos stop = chunk->GetRange().GetFrom();        stop = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, stop);                m_AccumGap.AddRange(TSeqRange(start, stop));    }}// erase gaps where there are segments.void CAlignmentSmear::MaskGaps(){    TSegMap::iterator seg_it = m_AccumSeg.begin();    TGapMap::iterator gap_it = m_AccumGap.begin();    for (; gap_it != m_AccumGap.end(); ++gap_it, ++seg_it  ) {        if (*gap_it > 0   &&  *seg_it > 0) {            *gap_it = 0;        }    }    }string CAlignmentSmear::GetLabel() const{    return m_Label;}void CAlignmentSmear::SetLabel(const string& label){    m_Label = label;}bool CAlignmentSmear::SeparateStrands(const objects::CSeq_annot& seq_annot){    string name = x_GetAnnotName(seq_annot);        if (name == "BLASTX - swissprot"  ||         name == "BLASTN - mrna" ) {        return true;       }    return false;}static const string s_BlastFieldKey("Blast Type");static const string s_HistFieldKey("Hist Seqalign");string CAlignmentSmear::x_GetAnnotName(const objects::CSeq_annot& seq_annot){    string label;    string annot_name;    string blast_type;    string hist_type;        if (seq_annot.IsSetDesc()) {        ITERATE ( CSeq_annot::TDesc::Tdata, it,                  seq_annot.GetDesc().Get() ) {            const CAnnotdesc& desc = **it;            if (desc.Which() == CAnnotdesc::e_Name ) {                annot_name = desc.GetName();            }            if (desc.Which() == CAnnotdesc::e_User) {                const CUser_object& user_obj = desc.GetUser();                            if (user_obj.CanGetType()  &&                    user_obj.GetType().Which() == CObject_id::e_Str ) {                                    if (user_obj.GetType().GetStr()  ==  s_BlastFieldKey) {                        const CUser_field& user_field = * user_obj.GetData().front();                                                if (user_field.CanGetData()  &&                              user_field.GetData().Which() == CUser_field::TData::e_Int &&                            user_field.CanGetLabel()) {                            const CObject_id& id = user_field.GetLabel();                                                        if (id.Which() == CObject_id::e_Str ) {                                blast_type = id.GetStr();                            }                        }                    }                                    if (user_obj.GetType().GetStr()  ==  s_HistFieldKey  ) {                        const CUser_field& user_field = * user_obj.GetData().front();                        if (user_field.CanGetData()  &&                              user_field.GetData().Which() == CUser_field::TData::e_Bool &&                            user_field.GetData().GetBool()  &&                            user_field.CanGetLabel()) {                            const CObject_id& id = user_field.GetLabel();                            if (id.Which() == CObject_id::e_Str ) {                                hist_type = id.GetStr();                            }                        }                    }                }            }        }        if ( ! annot_name.empty()) {            label = annot_name;        } else if ( ! blast_type.empty() ) {            label = blast_type;        } else if ( ! hist_type.empty() ) {            label = hist_type;        }    }     return label;}END_NCBI_SCOPE/* * =========================================================================== * $Log: alignment_smear.cpp,v $ * Revision 1000.0  2004/06/01 21:19:53  gouriano * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.2 * * Revision 1.2  2004/05/21 22:27:43  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.1  2004/04/30 11:48:15  dicuccio * Initial commit - split out from src/gui/utils * * Revision 1.2  2004/04/07 15:22:39  rsmith * new (not yet used) implementation of AddAlignments * * Revision 1.1  2004/03/22 16:40:55  rsmith * initial checkin * * * =========================================================================== */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?