alignment_smear.cpp
来自「ncbi源码」· C++ 代码 · 共 367 行
CPP
367 行
/* * =========================================================================== * PRODUCTION $Log: alignment_smear.cpp,v $ * PRODUCTION Revision 1000.0 2004/06/01 21:19:53 gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.2 * PRODUCTION * =========================================================================== *//* $Id: alignment_smear.cpp,v 1000.0 2004/06/01 21:19:53 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Robert Smith * * File Description: * */#include <ncbi_pch.hpp>#include <gui/objutils/alignment_smear.hpp>#include <objtools/alnmgr/alnmap.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/annot_selector.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/seq/Annot_descr.hpp>#include <objects/seq/Annotdesc.hpp>#include <objects/general/User_object.hpp>#include <objects/general/User_field.hpp>#include <objects/general/Object_id.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CAlignmentSmear::CAlignmentSmear( const objects::CBioseq_Handle& handle, TSeqPos start, TSeqPos stop, TSeqPos window, EAlignSmearStrand strand_type ) : m_BioseqHandle(handle), m_AccumSeg(start, stop, window), m_AccumGap(start, stop, window), m_StrandType(strand_type){}/// smear all the alignments in this annotation.void CAlignmentSmear::AddAnnot(const CSeq_annot& seq_annot){ objects::SAnnotSelector sel; sel.SetLimitSeqAnnot(&seq_annot) .SetSortOrder(SAnnotSelector::eSortOrder_None); AddAlignments(sel); // now if there is a chance we would put more than one annotation // in a smear we need to join their names together here, or something else? SetLabel(x_GetAnnotName(seq_annot));}#if 1/// Smear all the alignments matchec by this selector on my bioseq.void CAlignmentSmear::AddAlignments(const objects::SAnnotSelector& sel){ TSeqPos start(m_AccumSeg.GetStart()); TSeqPos stop(m_AccumSeg.GetStop()); // grab a alignment iterator for our range CAlign_CI align_iter(m_BioseqHandle, start, stop, sel); for (int ai = 0; align_iter; ++align_iter, ++ai) { const CSeq_align& align = *align_iter; // convert to an AlnMap CRef< CAlnMap> aln_map; if (align.GetSegs().IsStd()) { continue; // TODO: Make StdSeqs work. CRef<CSeq_align> ds_align = align.CreateDensegFromStdseg(); aln_map = new CAlnMap( ds_align->GetSegs().GetDenseg()); } else if (align.GetSegs().IsDenseg()) { aln_map = new CAlnMap(align.GetSegs().GetDenseg()); } AddAlignment(*aln_map); } MaskGaps();}#else/* same thing but we group alignments by their id *//// Smear all the alignments matchec by this selector on my bioseq.void CAlignmentSmear::AddAlignments(const objects::SAnnotSelector& sel){ TSeqPos start(m_AccumSeg.GetStart()); TSeqPos stop(m_AccumSeg.GetStop()); typedef map<string, CRef<CAlnMix> > TMixes; TMixes mixes; // grab a alignment iterator for our range CAlign_CI align_iter(m_BioseqHandle, start, stop, sel); for (int ai = 0; align_iter; ++align_iter, ++ai) { const CSeq_align& align = *align_iter; // convert to an AlnMap CRef< CAlnMap> aln_map; if (align.GetSegs().IsStd()) { CRef<CSeq_align> ds_align = align.CreateDensegFromStdseg(); aln_map = new CAlnMap( ds_align->GetSegs().GetDenseg()); } else if (align.GetSegs().IsDenseg()) { aln_map = new CAlnMap(align.GetSegs().GetDenseg()); } // which row of the alignment is the other sequence on? CAlnMap::TNumrow row = m_BioseqHandle.IsSynonym( aln_map->GetSeqId(0)) ? 1 : 0; // What is that sequences label? string align_label; aln_map->GetSeqId(row).GetLabel(&align_label); // Mix the alignments based on their label. TMixes::const_iterator mit = mixes.find(align_label); if (mit == mixes.end()) { CRef<CAlnMix> mix_ref(new CAlnMix(m_BioseqHandle.GetScope())); mixes[align_label] = mix_ref; } mixes[align_label]->Add(aln_map->GetDenseg()); } // Now smear the mixed alignments. NON_CONST_ITERATE(TMixes, mit, mixes) { CAlnMix & aln_mix(*(mit->second)); aln_mix.Merge(CAlnMix::fGen2EST | CAlnMix::fTryOtherMethodOnFail | CAlnMix::fGapJoin); // convert to a CAlnMap CAlnMap aln_map(aln_mix.GetDenseg()); AddAlignment(aln_map); } // take out gaps in the middle of blocks. MaskGaps();}#endifvoid CAlignmentSmear::AddAlignment(CAlnMap& aln_map){ // check number of dimensions if ( aln_map.GetNumRows() != 2) { // cout << "alignment does not have 2 rows." << endl; return; } // find out what row our bioseq is on. CAlnMap::TNumrow bs_row = m_BioseqHandle.IsSynonym( aln_map.GetSeqId(0)) ? 0 : 1; _ASSERT( bs_row == 0 || bs_row == 1 ); CAlnMap::TNumrow other_row = 1 - bs_row; // works since bs_row is always 0 or 1. // Are we only doing one strand? if (m_StrandType == eSmearStrand_Pos && aln_map.IsNegativeStrand(bs_row) ) return; if (m_StrandType == eSmearStrand_Neg && aln_map.IsPositiveStrand(bs_row) ) return; CAlnMap::TSignedRange range(aln_map.GetAlnStart(), aln_map.GetAlnStop()); CRef<CAlnMap::CAlnChunkVec> aln_chunks (aln_map.GetAlnChunks(other_row, range, CAlnMap::fSeqOnly )); // | CAlnMap::fChunkSameAsSeg)); // Smear all the segments. for (size_t i = 0; i < aln_chunks->size(); ++i) { CConstRef<CAlnMap::CAlnChunk> chunk((*aln_chunks)[i]); TSeqPos start = chunk->GetRange().GetFrom(); start = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, start); TSeqPos stop = chunk->GetRange().GetTo(); stop = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, stop); m_AccumSeg.AddRange(TSeqRange(start, stop), 1); } // Smear the gaps. for (size_t i = 1; i < aln_chunks->size(); ++i) { CConstRef<CAlnMap::CAlnChunk> prev_chunk((*aln_chunks)[i-1]); CConstRef<CAlnMap::CAlnChunk> chunk((*aln_chunks)[i]); TSeqPos start = prev_chunk->GetRange().GetTo(); start = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, start); TSeqPos stop = chunk->GetRange().GetFrom(); stop = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, stop); m_AccumGap.AddRange(TSeqRange(start, stop)); }}// erase gaps where there are segments.void CAlignmentSmear::MaskGaps(){ TSegMap::iterator seg_it = m_AccumSeg.begin(); TGapMap::iterator gap_it = m_AccumGap.begin(); for (; gap_it != m_AccumGap.end(); ++gap_it, ++seg_it ) { if (*gap_it > 0 && *seg_it > 0) { *gap_it = 0; } } }string CAlignmentSmear::GetLabel() const{ return m_Label;}void CAlignmentSmear::SetLabel(const string& label){ m_Label = label;}bool CAlignmentSmear::SeparateStrands(const objects::CSeq_annot& seq_annot){ string name = x_GetAnnotName(seq_annot); if (name == "BLASTX - swissprot" || name == "BLASTN - mrna" ) { return true; } return false;}static const string s_BlastFieldKey("Blast Type");static const string s_HistFieldKey("Hist Seqalign");string CAlignmentSmear::x_GetAnnotName(const objects::CSeq_annot& seq_annot){ string label; string annot_name; string blast_type; string hist_type; if (seq_annot.IsSetDesc()) { ITERATE ( CSeq_annot::TDesc::Tdata, it, seq_annot.GetDesc().Get() ) { const CAnnotdesc& desc = **it; if (desc.Which() == CAnnotdesc::e_Name ) { annot_name = desc.GetName(); } if (desc.Which() == CAnnotdesc::e_User) { const CUser_object& user_obj = desc.GetUser(); if (user_obj.CanGetType() && user_obj.GetType().Which() == CObject_id::e_Str ) { if (user_obj.GetType().GetStr() == s_BlastFieldKey) { const CUser_field& user_field = * user_obj.GetData().front(); if (user_field.CanGetData() && user_field.GetData().Which() == CUser_field::TData::e_Int && user_field.CanGetLabel()) { const CObject_id& id = user_field.GetLabel(); if (id.Which() == CObject_id::e_Str ) { blast_type = id.GetStr(); } } } if (user_obj.GetType().GetStr() == s_HistFieldKey ) { const CUser_field& user_field = * user_obj.GetData().front(); if (user_field.CanGetData() && user_field.GetData().Which() == CUser_field::TData::e_Bool && user_field.GetData().GetBool() && user_field.CanGetLabel()) { const CObject_id& id = user_field.GetLabel(); if (id.Which() == CObject_id::e_Str ) { hist_type = id.GetStr(); } } } } } } if ( ! annot_name.empty()) { label = annot_name; } else if ( ! blast_type.empty() ) { label = blast_type; } else if ( ! hist_type.empty() ) { label = hist_type; } } return label;}END_NCBI_SCOPE/* * =========================================================================== * $Log: alignment_smear.cpp,v $ * Revision 1000.0 2004/06/01 21:19:53 gouriano * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.2 * * Revision 1.2 2004/05/21 22:27:43 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.1 2004/04/30 11:48:15 dicuccio * Initial commit - split out from src/gui/utils * * Revision 1.2 2004/04/07 15:22:39 rsmith * new (not yet used) implementation of AddAlignments * * Revision 1.1 2004/03/22 16:40:55 rsmith * initial checkin * * * =========================================================================== */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?