📄 reader.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: reader.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:41:40 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.33 * PRODUCTION * =========================================================================== *//* $Id: reader.cpp,v 1000.1 2004/06/01 19:41:40 gouriano Exp $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * =========================================================================== * * Author: Anton Butanaev, Eugene Vasilchenko * * File Description: Base data reader interface * */#include <ncbi_pch.hpp>#include <objtools/data_loaders/genbank/reader.hpp>#include <serial/pack_string.hpp>#include <objmgr/annot_selector.hpp>#include <objmgr/objmgr_exception.hpp>#include <objmgr/impl/snp_annot_info.hpp>#include <objmgr/impl/tse_info.hpp>#include <objmgr/impl/tse_chunk_info.hpp>#include <objmgr/impl/seq_annot_info.hpp>#include <objmgr/impl/handle_range_map.hpp>#include <objects/general/Object_id.hpp>#include <objects/general/Dbtag.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/Gb_qual.hpp>#include <objects/seqfeat/Imp_feat.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <serial/serial.hpp>#include <serial/objistr.hpp>#include <serial/objectinfo.hpp>#include <serial/objectiter.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)static const char* const STRING_PACK_ENV = "GENBANK_SNP_PACK_STRINGS";static const char* const SNP_SPLIT_ENV = "GENBANK_SNP_SPLIT";static const char* const SNP_TABLE_ENV = "GENBANK_SNP_TABLE";static const char* const ENV_YES = "YES";CReader::CReader(void){}CReader::~CReader(void){}int CReader::GetConst(const string& ) const{ return 0;}bool CReader::s_GetEnvFlag(const char* env, bool def_val){ const char* val = ::getenv(env); if ( !val ) { return def_val; } string s(val); return s == "1" || NStr::CompareNocase(s, ENV_YES) == 0;}bool CReader::TrySNPSplit(void){ static bool snp_split = s_GetEnvFlag(SNP_SPLIT_ENV, true); return snp_split;}bool CReader::TrySNPTable(void){ static bool snp_table = s_GetEnvFlag(SNP_TABLE_ENV, true); return snp_table;}bool CReader::TryStringPack(void){ static bool use_string_pack = CPackString::TryStringPack() && s_GetEnvFlag(STRING_PACK_ENV, true); return use_string_pack;}void CReader::SetSNPReadHooks(CObjectIStream& in){ if ( !TryStringPack() ) { return; } CObjectTypeInfo type; type = CType<CGb_qual>(); type.FindMember("qual").SetLocalReadHook(in, new CPackStringClassHook); type.FindMember("val").SetLocalReadHook(in, new CPackStringClassHook(4, 128)); type = CObjectTypeInfo(CType<CImp_feat>()); type.FindMember("key").SetLocalReadHook(in, new CPackStringClassHook(32, 128)); type = CObjectTypeInfo(CType<CObject_id>()); type.FindVariant("str").SetLocalReadHook(in, new CPackStringChoiceHook); type = CObjectTypeInfo(CType<CDbtag>()); type.FindMember("db").SetLocalReadHook(in, new CPackStringClassHook); type = CObjectTypeInfo(CType<CSeq_feat>()); type.FindMember("comment").SetLocalReadHook(in, new CPackStringClassHook);}void CReader::SetSeqEntryReadHooks(CObjectIStream& in){ if ( !TryStringPack() ) { return; } CObjectTypeInfo type; type = CObjectTypeInfo(CType<CObject_id>()); type.FindVariant("str").SetLocalReadHook(in, new CPackStringChoiceHook); type = CObjectTypeInfo(CType<CImp_feat>()); type.FindMember("key").SetLocalReadHook(in, new CPackStringClassHook(32, 128)); type = CObjectTypeInfo(CType<CDbtag>()); type.FindMember("db").SetLocalReadHook(in, new CPackStringClassHook); type = CType<CGb_qual>(); type.FindMember("qual").SetLocalReadHook(in, new CPackStringClassHook);}bool CReader::IsSNPSeqref(const CSeqref& seqref){ return seqref.GetSat() == eSatellite_SNP;}void CReader::AddSNPSeqref(TSeqrefs& srs, int gi, CSeqref::TFlags flags){ flags |= CSeqref::fHasExternal; CRef<CSeqref> sr(new CSeqref(gi, eSatellite_SNP, gi)); sr->SetFlags(flags); srs.push_back(sr);}void CReader::ResolveSeq_id(TSeqrefs& srs, const CSeq_id& id, TConn conn){ int gi; if ( id.IsGi() ) { gi = id.GetGi(); } else { gi = ResolveSeq_id_to_gi(id, conn); } if ( gi ) { RetrieveSeqrefs(srs, gi, conn); }}void CReader::PurgeSeq_id_to_gi(const CSeq_id& /*id*/){}void CReader::PurgeSeqrefs(const TSeqrefs& /*srs*/, const CSeq_id& /*id*/){}CRef<CTSE_Info> CReader::GetBlob(const CSeqref& seqref, TConn conn, CTSE_Chunk_Info* chunk_info){ CRef<CTSE_Info> ret; if ( chunk_info ) { if ( IsSNPSeqref(seqref) && chunk_info->GetChunkId()==kSNP_ChunkId ) { GetSNPChunk(seqref, *chunk_info, conn); } else { GetTSEChunk(seqref, *chunk_info, conn); } } else { if ( IsSNPSeqref(seqref) ) { ret = GetSNPBlob(seqref, conn); } else { ret = GetTSEBlob(seqref, conn); } } return ret;}CRef<CTSE_Info> CReader::GetSNPBlob(const CSeqref& seqref, TConn /*conn*/){ _ASSERT(IsSNPSeqref(seqref)); CRef<CSeq_entry> seq_entry(new CSeq_entry); seq_entry->SetSet().SetSeq_set(); seq_entry->SetSet().SetId().SetId(kSNP_EntryId); // create CTSE_Info CRef<CTSE_Info> ret(new CTSE_Info(*seq_entry)); ret->SetName("SNP"); CRef<CTSE_Chunk_Info> info(new CTSE_Chunk_Info(kSNP_ChunkId)); info->x_AddAnnotPlace(CTSE_Chunk_Info::eBioseq_set, kSNP_EntryId); info->x_AddAnnotType(CAnnotName("SNP"), SAnnotTypeSelector(CSeqFeatData::eSubtype_variation), seqref.GetGi(), CTSE_Chunk_Info::TLocationRange::GetWhole()); info->x_TSEAttach(*ret); return ret;}void CReader::GetTSEChunk(const CSeqref& /*seqref*/, CTSE_Chunk_Info& /*chunk_info*/, TConn /*conn*/){ NCBI_THROW(CLoaderException, eNoData, "Chunks are not implemented");}void CReader::GetSNPChunk(const CSeqref& seqref, CTSE_Chunk_Info& chunk, TConn conn){ _ASSERT(IsSNPSeqref(seqref)); _ASSERT(chunk.GetChunkId() == kSNP_ChunkId); CRef<CSeq_annot_SNP_Info> snp_annot = GetSNPAnnot(seqref, conn); CRef<CSeq_annot_Info> annot_info(new CSeq_annot_Info(*snp_annot)); CTSE_Chunk_Info::TPlace place(CTSE_Chunk_Info::eBioseq_set, kSNP_EntryId); chunk.x_LoadAnnot(place, annot_info);}END_SCOPE(objects)END_NCBI_SCOPE/* * $Log: reader.cpp,v $ * Revision 1000.1 2004/06/01 19:41:40 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.33 * * Revision 1.33 2004/05/21 21:42:52 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.32 2004/03/16 15:47:29 vasilche * Added CBioseq_set_Handle and set of EditHandles * * Revision 1.31 2004/02/18 14:01:25 dicuccio * Added new satellites for TRACE_ASSM, TR_ASSM_CH. Added support for overloading * the ID1 named service * * Revision 1.30 2004/02/17 21:18:53 vasilche * Fixed 'unused argument' warnings. * * Revision 1.29 2004/01/22 20:10:35 vasilche * 1. Splitted ID2 specs to two parts. * ID2 now specifies only protocol. * Specification of ID2 split data is moved to seqsplit ASN module. * For now they are still reside in one resulting library as before - libid2. * As the result split specific headers are now in objects/seqsplit. * 2. Moved ID2 and ID1 specific code out of object manager. * Protocol is processed by corresponding readers. * ID2 split parsing is processed by ncbi_xreader library - used by all readers. * 3. Updated OBJMGR_LIBS correspondingly. * * Revision 1.28 2004/01/13 16:55:55 vasilche * CReader, CSeqref and some more classes moved from xobjmgr to separate lib. * Headers moved from include/objmgr to include/objtools/data_loaders/genbank. * * Revision 1.27 2003/11/28 17:53:15 vasilche * Avoid calling CStreamUtils::Pushback() when constructing objects from text ASN. * * Revision 1.26 2003/11/26 17:55:58 vasilche * Implemented ID2 split in ID1 cache. * Fixed loading of splitted annotations. * * Revision 1.25 2003/10/27 15:05:41 vasilche * Added correct recovery of cached ID1 loader if gi->sat/satkey cache is invalid. * Added recognition of ID1 error codes: private, etc. * Some formatting of old code. * * Revision 1.24 2003/10/08 14:16:13 vasilche * Added version of blobs loaded from ID1. * * Revision 1.23 2003/10/07 13:43:23 vasilche * Added proper handling of named Seq-annots. * Added feature search from named Seq-annots. * Added configurable adaptive annotation search (default: gene, cds, mrna). * Fixed selection of blobs for loading from GenBank. * Added debug checks to CSeq_id_Mapper for easier finding lost CSeq_id_Handles. * Fixed leaked split chunks annotation stubs. * Moved some classes definitions in separate *.cpp files. * * Revision 1.22 2003/09/30 16:22:02 vasilche * Updated internal object manager classes to be able to load ID2 data. * SNP blobs are loaded as ID2 split blobs - readers convert them automatically. * Scope caches results of requests for data to data loaders. * Optimized CSeq_id_Handle for gis. * Optimized bioseq lookup in scope. * Reduced object allocations in annotation iterators. * CScope is allowed to be destroyed before other objects using this scope are * deleted (feature iterators, bioseq handles etc). * Optimized lookup for matching Seq-ids in CSeq_id_Mapper. * Added 'adaptive' option to objmgr_demo application. * * Revision 1.21 2003/08/27 14:25:22 vasilche * Simplified CCmpTSE class. * * Revision 1.20 2003/08/19 18:35:21 vasilche * CPackString classes were moved to SERIAL library. * * Revision 1.19 2003/08/14 20:05:19 vasilche * Simple SNP features are stored as table internally. * They are recreated when needed using CFeat_CI. * * Revision 1.18 2003/07/24 19:28:09 vasilche * Implemented SNP split for ID1 loader. * * Revision 1.17 2003/07/17 20:07:56 vasilche * Reduced memory usage by feature indexes. * SNP data is loaded separately through PUBSEQ_OS. * String compression for SNP data. * * Revision 1.16 2003/06/02 16:06:38 dicuccio * Rearranged src/objects/ subtree. This includes the following shifts: * - src/objects/asn2asn --> arc/app/asn2asn * - src/objects/testmedline --> src/objects/ncbimime/test * - src/objects/objmgr --> src/objmgr * - src/objects/util --> src/objmgr/util * - src/objects/alnmgr --> src/objtools/alnmgr * - src/objects/flat --> src/objtools/flat * - src/objects/validator --> src/objtools/validator * - src/objects/cddalignview --> src/objtools/cddalignview * In addition, libseq now includes six of the objects/seq... libs, and libmmdb * replaces the three libmmdb? libs. * * Revision 1.15 2003/04/24 16:12:38 vasilche * Object manager internal structures are splitted more straightforward. * Removed excessive header dependencies. * * Revision 1.14 2003/04/15 16:25:39 vasilche * Added initialization of int members. * * Revision 1.13 2003/04/15 14:24:08 vasilche * Changed CReader interface to not to use fake streams. * * Revision 1.12 2003/03/28 03:27:24 lavr * CIStream::Eof() conditional compilation removed; code reformatted * * Revision 1.11 2003/03/26 22:12:11 lavr * Revert CIStream::Eof() to destructive test * * Revision 1.10 2003/03/26 20:42:50 lavr * CIStream::Eof() made (temporarily) non-destructive w/o get() * * Revision 1.9 2003/02/26 18:02:39 vasilche * Added istream error check. * Avoid use of string::c_str() method. * * Revision 1.8 2003/02/25 22:03:44 vasilche * Fixed identation. * * Revision 1.7 2002/11/27 21:09:43 lavr * Take advantage of CStreamUtils::Readsome() in CIStream::Read() * CIStream::Eof() modified to use get() instead of operator>>() * * Revision 1.6 2002/05/06 03:28:47 vakatov * OM/OM1 renaming * * Revision 1.5 2002/03/27 20:23:50 butanaev * Added connection pool. * * Revision 1.4 2002/03/27 18:06:08 kimelman * stream.read/write instead of << >> * * Revision 1.3 2002/03/21 19:14:54 kimelman * GB related bugfixes * * Revision 1.2 2002/03/20 04:50:13 kimelman * GB loader added * * Revision 1.1 2002/01/11 19:06:21 gouriano * restructured objmgr * * Revision 1.6 2001/12/13 00:19:25 kimelman * bugfixes: * * Revision 1.5 2001/12/12 21:46:40 kimelman * Compare interface fix * * Revision 1.4 2001/12/10 20:08:01 butanaev * Code cleanup. * * Revision 1.3 2001/12/07 21:24:59 butanaev * Interface development, code beautyfication. * * Revision 1.2 2001/12/07 16:43:58 butanaev * Fixed includes. * * Revision 1.1 2001/12/07 16:10:22 butanaev * Switching to new reader interfaces. * * Revision 1.2 2001/12/06 18:06:22 butanaev * Ported to linux. * * Revision 1.1 2001/12/06 14:35:22 butanaev * New streamable interfaces designed, ID1 reimplemented. * */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -