reader_id1.cpp
来自「ncbi源码」· C++ 代码 · 共 1,248 行 · 第 1/3 页
CPP
1,248 行
/* * =========================================================================== * PRODUCTION $Log: reader_id1.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:41:56 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.81 * PRODUCTION * =========================================================================== *//* $Id: reader_id1.cpp,v 1000.1 2004/06/01 19:41:56 gouriano Exp $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Anton Butanaev, Eugene Vasilchenko * * File Description: Data reader from ID1 * */#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbienv.hpp>#include <objtools/data_loaders/genbank/readers/id1/reader_id1.hpp>#include <objmgr/objmgr_exception.hpp>#include <objmgr/impl/tse_info.hpp>#include <objmgr/impl/tse_chunk_info.hpp>#include <objtools/data_loaders/genbank/reader_snp.hpp>#include <objtools/data_loaders/genbank/split_parser.hpp>#include <corelib/ncbistre.hpp>#define ID1_COLLECT_STATS#ifdef ID1_COLLECT_STATS# include <corelib/ncbitime.hpp>#endif#include <corelib/plugin_manager_impl.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/id1/id1__.hpp>#include <objects/seqsplit/ID2S_Split_Info.hpp>#include <serial/enumvalues.hpp>#include <serial/iterator.hpp>#include <serial/objistrasnb.hpp>#include <serial/objostrasn.hpp>#include <serial/objostrasnb.hpp>#include <serial/serial.hpp>#include <connect/ncbi_conn_stream.hpp>#include <util/compress/reader_zlib.hpp>#include <util/stream_utils.hpp>#include <util/static_map.hpp>#include <memory>#include <iomanip>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)#ifdef ID1_COLLECT_STATSstatic int resolve_id_count = 0;static double resolve_id_time = 0;static int resolve_gi_count = 0;static double resolve_gi_time = 0;static int resolve_ver_count = 0;static double resolve_ver_time = 0;static double last_object_bytes = 0;static int main_blob_count = 0;static double main_bytes = 0;static double main_time = 0;static int snp_blob_count = 0;static double snp_compressed = 0;static double snp_uncompressed = 0;static double snp_time = 0;static double snp_total_read_time = 0;static double snp_decompression_time = 0;static int s_GetCollectStatistics(void){ const char* env = getenv("GENBANK_ID1_STATS"); if ( !env || !*env ) { return 0; } try { return NStr::StringToInt(env); } catch ( ... ) { return 0; }}#endifint CId1Reader::CollectStatistics(void){#ifdef ID1_COLLECT_STATS static int ret = s_GetCollectStatistics(); return ret;#else return 0;#endif}CId1Reader::CId1Reader(TConn noConn) : m_NoMoreConnections(false){ noConn=1; // limit number of simultaneous connections to one#if !defined(NCBI_THREADS) noConn=1;#endif try { SetParallelLevel(noConn); } catch ( ... ) { SetParallelLevel(0); throw; }}CId1Reader::~CId1Reader(){ SetParallelLevel(0);#ifdef ID1_COLLECT_STATS if ( CollectStatistics() ) { PrintStatistics(); }#endif}void CId1Reader::PrintStat(const char* type, size_t count, const char* what, double time){#ifdef ID1_COLLECT_STATS if ( !count ) { return; } LOG_POST(type <<' '<<count<<' '<<what<<" in "<< setiosflags(ios::fixed)<< setprecision(3)<< (time)<<" s "<< (time*1000/count)<<" ms/one");#endif}void CId1Reader::PrintBlobStat(const char* type, size_t count, double bytes, double time){#ifdef ID1_COLLECT_STATS if ( !count ) { return; } LOG_POST(type<<' '<<count<<" blobs "<< setiosflags(ios::fixed)<< setprecision(2)<< (bytes/1024)<<" kB in "<< setprecision(3)<< (time)<<" s "<< setprecision(2)<< (bytes/time/1024)<<" kB/s");#endif}void CId1Reader::LogStat(const char* type, const string& name, double time){#ifdef ID1_COLLECT_STATS if ( CollectStatistics() <= 1 ) { return; } LOG_POST(type<<' '<<name<<" in "<< setiosflags(ios::fixed)<< setprecision(3)<< (time*1000)<<" ms");#endif}void CId1Reader::LogStat(const char* type, const string& name, const string& subkey, double time){#ifdef ID1_COLLECT_STATS if ( CollectStatistics() <= 1 ) { return; } LOG_POST(type<<' '<<name<<" ("<<subkey<<") in "<< setiosflags(ios::fixed)<< setprecision(3)<< (time*1000)<<" ms");#endif}void CId1Reader::LogStat(const char* type, const CSeq_id& id, double time){#ifdef ID1_COLLECT_STATS if ( CollectStatistics() <= 1 ) { return; } LOG_POST(type<<' '<<id.AsFastaString()<<" in "<< setiosflags(ios::fixed)<< setprecision(3)<< (time*1000)<<" ms");#endif}void CId1Reader::LogStat(const char* type, const CID1server_maxcomplex& maxplex, double time){#ifdef ID1_COLLECT_STATS if ( CollectStatistics() <= 1 ) { return; } LOG_POST(type<<" TSE("<<maxplex.GetSat()<<','<<maxplex.GetEnt()<<") in "<< setiosflags(ios::fixed)<< setprecision(3)<< (time*1000)<<" ms");#endif}void CId1Reader::LogStat(const char* type, int gi, double time){#ifdef ID1_COLLECT_STATS if ( CollectStatistics() <= 1 ) { return; } LOG_POST(type<<' '<<gi<<" in "<< setiosflags(ios::fixed)<< setprecision(3)<< (time*1000)<<" ms");#endif}void CId1Reader::LogBlobStat(const char* type, const CSeqref& seqref, double bytes, double time){#ifdef ID1_COLLECT_STATS if ( CollectStatistics() <= 1 ) { return; } LOG_POST(type<<' '<<seqref.printTSE()<<' '<< setiosflags(ios::fixed)<< setprecision(2)<< (bytes/1024)<<" kB in "<< setprecision(3)<< (time*1000)<<" ms "<< setprecision(2)<< (bytes/1024/time)<<" kB/s");#endif}void CId1Reader::PrintStatistics(void) const{#ifdef ID1_COLLECT_STATS PrintStat("ID1 resolution: resolved", resolve_id_count, "ids", resolve_id_time); PrintStat("ID1 resolution: resolved", resolve_gi_count, "gis", resolve_gi_time); PrintStat("ID1 resolution: resolved", resolve_ver_count, "vers", resolve_ver_time); PrintBlobStat("ID1 non-SNP: loaded", main_blob_count, main_bytes, main_time); PrintBlobStat("ID1 SNP: loaded", snp_blob_count, snp_compressed, snp_time); if ( snp_blob_count ) { LOG_POST("ID1 SNP decompression: "<< setiosflags(ios::fixed)<< setprecision(2)<< (snp_compressed/1024)<<" kB -> "<< (snp_uncompressed/1024)<<" kB, compession ratio: "<< setprecision(1)<< (snp_uncompressed/snp_compressed)); double snp_parse_time = snp_time - snp_total_read_time; LOG_POST("ID1 SNP times: decompression : "<< setiosflags(ios::fixed)<< setprecision(3)<< (snp_decompression_time)<<" s, total read time: "<< (snp_total_read_time)<<" s, parse time: "<< (snp_parse_time)<<" s"); } PrintBlobStat("ID1 total: loaded", main_blob_count + snp_blob_count, main_bytes + snp_compressed, main_time + snp_time);#endif}CReader::TConn CId1Reader::GetParallelLevel(void) const{ return m_Pool.size();}void CId1Reader::SetParallelLevel(TConn size){ size_t oldSize = m_Pool.size(); for (size_t i = size; i < oldSize; ++i) { delete m_Pool[i]; m_Pool[i] = 0; } m_Pool.resize(size); for (size_t i = oldSize; i < min(1u, size); ++i) { m_Pool[i] = x_NewConnection(); }}CConn_ServiceStream* CId1Reader::x_GetConnection(TConn conn){ conn = conn % m_Pool.size(); CConn_ServiceStream* ret = m_Pool[conn]; if ( !ret ) { ret = x_NewConnection(); if ( !ret ) { NCBI_THROW(CLoaderException, eNoConnection, "too many connections failed: probably server is dead"); } m_Pool[conn] = ret; } return ret;}void CId1Reader::Reconnect(TConn conn){ _TRACE("Reconnect(" << conn << ")"); conn = conn % m_Pool.size(); delete m_Pool[conn]; m_Pool[conn] = 0;}CConn_ServiceStream* CId1Reader::x_NewConnection(void){ for ( int i = 0; !m_NoMoreConnections && i < 3; ++i ) { try { _TRACE("CId1Reader(" << this << ")->x_NewConnection()"); string id1_svc; {{ CNcbiApplication* app = CNcbiApplication::Instance(); static const char* env_var = "NCBI_SERVICE_NAME_ID1"; if (app) { id1_svc = app->GetEnvironment().Get(env_var); } else { char* s = ::getenv(env_var); if (s) { id1_svc = s; } } }} if ( id1_svc.empty() ) { id1_svc = "ID1"; } STimeout tmout; tmout.sec = 20; tmout.usec = 0; auto_ptr<CConn_ServiceStream> stream (new CConn_ServiceStream(id1_svc, fSERV_Any, 0, 0, &tmout)); if ( !stream->bad() ) { return stream.release(); } ERR_POST("CId1Reader::x_NewConnection: cannot connect."); } catch ( CException& exc ) { ERR_POST("CId1Reader::x_NewConnection: cannot connect: " <<
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?