split_parser.cpp
来自「ncbi源码」· C++ 代码 · 共 287 行
CPP
287 行
/* * =========================================================================== * PRODUCTION $Log: split_parser.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:41:49 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//* $Id: split_parser.cpp,v 1000.1 2004/06/01 19:41:49 gouriano Exp $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Eugene Vasilchenko * * File Description: Methods to create object manager structures from ID2 spec * */#include <ncbi_pch.hpp>#include <objtools/data_loaders/genbank/split_parser.hpp>#include <objmgr/objmgr_exception.hpp>#include <objmgr/impl/tse_info.hpp>#include <objmgr/impl/tse_chunk_info.hpp>#include <objmgr/impl/seq_annot_info.hpp>#include <objects/seqsplit/seqsplit__.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)void CSplitParser::Attach(CTSE_Info& tse, const CID2S_Split_Info& split){ ITERATE ( CID2S_Split_Info::TChunks, it, split.GetChunks() ) { CRef<CTSE_Chunk_Info> chunk = Parse(**it); chunk->x_TSEAttach(tse); }}CRef<CTSE_Chunk_Info> CSplitParser::Parse(const CID2S_Chunk_Info& info){ CRef<CTSE_Chunk_Info> ret(new CTSE_Chunk_Info(info.GetId())); ITERATE ( CID2S_Chunk_Info::TContent, it, info.GetContent() ) { const CID2S_Chunk_Content& content = **it; switch ( content.Which() ) { case CID2S_Chunk_Content::e_Seq_annot: x_Attach(*ret, content.GetSeq_annot()); break; case CID2S_Chunk_Content::e_Seq_annot_place: x_Attach(*ret, content.GetSeq_annot_place()); break; default: NCBI_THROW(CLoaderException, eOtherError, "Unexpected split data"); } } return ret;}void CSplitParser::x_Attach(CTSE_Chunk_Info& chunk, const CID2S_Seq_annot_place_Info& place){ ITERATE ( CID2S_Seq_annot_place_Info::TBioseqs, it, place.GetBioseqs() ) { chunk.x_AddAnnotPlace(CTSE_Chunk_Info::eBioseq, *it); } ITERATE ( CID2S_Seq_annot_place_Info::TBioseq_sets, it, place.GetBioseq_sets() ) { chunk.x_AddAnnotPlace(CTSE_Chunk_Info::eBioseq_set, *it); }}void CSplitParser::x_Attach(CTSE_Chunk_Info& chunk, const CID2S_Seq_annot_Info& annot){ CAnnotName name; if ( annot.IsSetName() ) { name.SetNamed(annot.GetName()); } TLocationSet loc; x_ParseLocation(loc, annot.GetSeq_loc()); if ( annot.IsSetAlign() ) { SAnnotTypeSelector sel(CSeq_annot::TData::e_Align); chunk.x_AddAnnotType(name, sel, loc); } if ( annot.IsSetGraph() ) { SAnnotTypeSelector sel(CSeq_annot::TData::e_Graph); chunk.x_AddAnnotType(name, sel, loc); } ITERATE ( CID2S_Seq_annot_Info::TFeat, it, annot.GetFeat() ) { const CID2S_Feat_type_Info& type = **it; if ( type.IsSetSubtypes() ) { ITERATE ( CID2S_Feat_type_Info::TSubtypes, sit, type.GetSubtypes() ) { SAnnotTypeSelector sel(CSeqFeatData::ESubtype(+*sit)); chunk.x_AddAnnotType(name, sel, loc); } } else { SAnnotTypeSelector sel(CSeqFeatData::E_Choice(type.GetType())); chunk.x_AddAnnotType(name, sel, loc); } }}inlinevoid CSplitParser::x_AddWhole(TLocationSet& vec, const TLocationId& id){ vec.push_back(TLocation(id, TLocationRange::GetWhole()));}inlinevoid CSplitParser::x_AddInterval(TLocationSet& vec, const TLocationId& id, TSeqPos start, TSeqPos length){ vec.push_back(TLocation(id, TLocationRange(start, start+length-1)));}void CSplitParser::x_ParseLocation(TLocationSet& vec, const CID2_Seq_loc& loc){ switch ( loc.Which() ) { case CID2_Seq_loc::e_Gi_whole: { x_AddWhole(vec, loc.GetGi_whole()); break; } case CID2_Seq_loc::e_Gi_whole_range: { const CID2_Id_Range& wr = loc.GetGi_whole_range(); for ( int gi = wr.GetStart(), end = gi+wr.GetCount(); gi < end; ++gi ) x_AddWhole(vec, gi); break; } case CID2_Seq_loc::e_Interval: { const CID2_Interval& interval = loc.GetInterval(); x_AddInterval(vec, interval.GetGi(), interval.GetStart(), interval.GetLength()); break; } case CID2_Seq_loc::e_Packed_ints: { const CID2_Packed_Seq_ints& ints = loc.GetPacked_ints(); ITERATE ( CID2_Packed_Seq_ints::TIntervals, it, ints.GetIntervals() ) { const CID2_Seq_range& interval = **it; x_AddInterval(vec, ints.GetGi(), interval.GetStart(), interval.GetLength()); } break; } case CID2_Seq_loc::e_Loc_set: { const CID2_Seq_loc::TLoc_set& loc_set = loc.GetLoc_set(); ITERATE ( CID2_Seq_loc::TLoc_set, it, loc_set ) { x_ParseLocation(vec, **it); } break; } }}void CSplitParser::Load(CTSE_Chunk_Info& chunk, const CID2S_Chunk& id2_chunk){ ITERATE ( CID2S_Chunk::TData, dit, id2_chunk.GetData() ) { const CID2S_Chunk_Data& data = **dit; CTSE_Chunk_Info::TPlace place; if ( data.GetId().IsGi() ) { place.first = CTSE_Chunk_Info::eBioseq; place.second = data.GetId().GetGi(); } else { place.first = CTSE_Chunk_Info::eBioseq_set; place.second = data.GetId().GetBioseq_set(); } ITERATE ( CID2S_Chunk_Data::TDescrs, it, data.GetDescrs() ) { NCBI_THROW(CLoaderException, eOtherError, "split descr is not supported"); } ITERATE ( CID2S_Chunk_Data::TAnnots, it, data.GetAnnots() ) { CSeq_annot& annot = const_cast<CSeq_annot&>(**it); CRef<CSeq_annot_Info> annot_info(new CSeq_annot_Info(annot)); chunk.x_LoadAnnot(place, annot_info); } ITERATE ( CID2S_Chunk_Data::TAssembly, it, data.GetAssembly() ) { NCBI_THROW(CLoaderException, eOtherError, "split assembly is not supported"); } ITERATE ( CID2S_Chunk_Data::TSeq_map, it, data.GetSeq_map() ) { NCBI_THROW(CLoaderException, eOtherError, "split seq-map is not supported"); } ITERATE ( CID2S_Chunk_Data::TSeq_data, it, data.GetSeq_data() ) { NCBI_THROW(CLoaderException, eOtherError, "split seq-data is not supported"); } }}END_SCOPE(objects)END_NCBI_SCOPE/* * $Log: split_parser.cpp,v $ * Revision 1000.1 2004/06/01 19:41:49 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * * Revision 1.6 2004/05/21 21:42:52 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.5 2004/03/16 15:47:29 vasilche * Added CBioseq_set_Handle and set of EditHandles * * Revision 1.4 2004/02/17 21:19:35 vasilche * Fixed 'non-const reference to temporary' warnings. * * Revision 1.3 2004/01/28 20:53:42 vasilche * Added CSplitParser::Attach(). * * Revision 1.2 2004/01/22 20:36:43 ucko * Correct path to seqsplit__.hpp. * * Revision 1.1 2004/01/22 20:10:35 vasilche * 1. Splitted ID2 specs to two parts. * ID2 now specifies only protocol. * Specification of ID2 split data is moved to seqsplit ASN module. * For now they are still reside in one resulting library as before - libid2. * As the result split specific headers are now in objects/seqsplit. * 2. Moved ID2 and ID1 specific code out of object manager. * Protocol is processed by corresponding readers. * ID2 split parsing is processed by ncbi_xreader library - used by all readers. * 3. Updated OBJMGR_LIBS correspondingly. * */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?