reader_id1_cache.cpp
来自「ncbi源码」· C++ 代码 · 共 1,511 行 · 第 1/3 页
CPP
1,511 行
/* * =========================================================================== * PRODUCTION $Log: reader_id1_cache.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:41:59 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.25 * PRODUCTION * =========================================================================== *//* $Id: reader_id1_cache.cpp,v 1000.2 2004/06/01 19:41:59 gouriano Exp $ * =========================================================================== * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Eugene Vasilchenko, Anatoliy Kuznetsov * * File Description: Cached extension of data reader from ID1 * */#include <ncbi_pch.hpp>#include <objtools/data_loaders/genbank/readers/id1/reader_id1_cache.hpp>#include <objtools/data_loaders/genbank/reader_snp.hpp>#include <objtools/data_loaders/genbank/split_parser.hpp>#include <corelib/ncbitime.hpp>#include <util/cache/blob_cache.hpp>#include <util/cache/int_cache.hpp>#include <util/cache/icache.hpp>#include <util/rwstream.hpp>#include <util/bytesrc.hpp>#include <serial/objistr.hpp>#include <serial/objistrasnb.hpp>#include <serial/objostrasnb.hpp>#include <objmgr/objmgr_exception.hpp>#include <objmgr/impl/snp_annot_info.hpp>#include <objmgr/impl/tse_chunk_info.hpp>#include <util/compress/reader_zlib.hpp>#include <connect/ncbi_conn_stream.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqsplit/ID2S_Split_Info.hpp>#include <objects/seqsplit/ID2S_Chunk_Info.hpp>#include <objects/seqsplit/ID2S_Chunk.hpp>#include <objects/seqsplit/ID2S_Chunk_Id.hpp>#include <objects/id1/id1__.hpp>#include <objects/id2/ID2_Reply_Data.hpp>#include <serial/serial.hpp>#include <stdio.h>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)/// Utility function to skip part of the input byte sourcevoid Id1ReaderSkipBytes(CByteSourceReader& reader, size_t to_skip);static size_t resolve_id_count = 0;static double resolve_id_time = 0;static size_t resolve_gi_count = 0;static double resolve_gi_time = 0;static size_t resolve_ver_count = 0;static double resolve_ver_time = 0;static size_t main_blob_count = 0;static double main_bytes = 0;static double main_time = 0;static size_t chunk_blob_count = 0;static double chunk_bytes = 0;static double chunk_time = 0;static size_t snp_load_count = 0;static double snp_load_bytes = 0;static double snp_load_time = 0;static size_t snp_store_count = 0;static double snp_store_bytes = 0;static double snp_store_time = 0;CCachedId1Reader::CCachedId1Reader(TConn noConn, IBLOB_Cache* blob_cache, IIntCache* id_cache) : CId1Reader(noConn), m_BlobCache(0), m_IdCache(0), m_OldBlobCache(0), m_OldIdCache(0){ SetBlobCache(blob_cache); SetIdCache(id_cache);}CCachedId1Reader::CCachedId1Reader(TConn noConn, ICache* blob_cache, ICache* id_cache) : CId1Reader(noConn), m_BlobCache(0), m_IdCache(0), m_OldBlobCache(0), m_OldIdCache(0){ SetBlobCache(blob_cache); SetIdCache(id_cache);}CCachedId1Reader::~CCachedId1Reader(){ if ( CollectStatistics() ) { PrintStatistics(); }}void CCachedId1Reader::PrintStatistics(void) const{ PrintStat("Cache resolution: resolved", resolve_id_count, "ids", resolve_id_time); PrintStat("Cache resolution: resolved", resolve_gi_count, "gis", resolve_gi_time); PrintStat("Cache resolution: resolved", resolve_ver_count, "blob vers", resolve_ver_time); PrintBlobStat("Cache main: loaded", main_blob_count, main_bytes, main_time); PrintBlobStat("Cache chunk: loaded", chunk_blob_count, chunk_bytes, chunk_time); PrintBlobStat("Cache SNP: loaded", snp_load_count, snp_load_bytes, snp_load_time); PrintBlobStat("Cache SNP: stored", snp_store_count, snp_store_bytes, snp_store_time);}void CCachedId1Reader::SetBlobCache(ICache* blob_cache){ m_OldBlobCache = 0; m_BlobCache = blob_cache;}void CCachedId1Reader::SetIdCache(ICache* id_cache){ m_OldIdCache = 0; m_IdCache = id_cache;}void CCachedId1Reader::SetBlobCache(IBLOB_Cache* blob_cache){ m_BlobCache = 0; if ( blob_cache && blob_cache != m_OldBlobCache ) { ERR_POST(Warning << "CCachedId1Reader: " "IBLOB_Cache is deprecated, use ICache instead"); } m_OldBlobCache = blob_cache;}void CCachedId1Reader::SetIdCache(IIntCache* id_cache){ m_IdCache = 0; if ( id_cache && id_cache != m_OldIdCache ) { ERR_POST(Warning << "CCachedId1Reader: " "IIntCache is deprecated, use ICache instead"); } m_OldIdCache = id_cache;}string CCachedId1Reader::GetBlobKey(const CSeqref& seqref) const{ int sat = seqref.GetSat(); int sat_key = seqref.GetSatKey(); char szBlobKeyBuf[256]; sprintf(szBlobKeyBuf, "%i-%i", sat, sat_key); return szBlobKeyBuf;}string CCachedId1Reader::GetIdKey(int gi) const{ return NStr::IntToString(gi);}string CCachedId1Reader::GetIdKey(const CSeq_id& id) const{ return id.IsGi()? GetIdKey(id.GetGi()): id.AsFastaString();}const char* CCachedId1Reader::GetSeqrefsSubkey(void) const{ return "srs";}const char* CCachedId1Reader::GetGiSubkey(void) const{ return "gi";}const char* CCachedId1Reader::GetBlobVersionSubkey(void) const{ return "ver";}const char* CCachedId1Reader::GetSeqEntrySubkey(void) const{ return "Seq-entry";}const char* CCachedId1Reader::GetSNPTableSubkey(void) const{ return "SNP table";}const char* CCachedId1Reader::GetSkeletonSubkey(void) const{ return "Skeleton";}const char* CCachedId1Reader::GetSplitInfoSubkey(void) const{ return "ID2S-Split-Info";}string CCachedId1Reader::GetChunkSubkey(int chunk_id) const{ return "ID2S-Chunk "+NStr::IntToString(chunk_id);}void CCachedId1Reader::PurgeSeqrefs(const TSeqrefs& srs, const CSeq_id& id){ if ( m_IdCache ) { m_IdCache->Remove(GetIdKey(id)); ITERATE ( TSeqrefs, it, srs ) { const CSeqref& sr = **it; m_IdCache->Remove(GetBlobKey(sr)); } } else if ( m_OldIdCache ) { ITERATE ( TSeqrefs, it, srs ) { const CSeqref& sr = **it; m_OldIdCache->Remove(sr.GetGi(), 0); m_OldIdCache->Remove(sr.GetSatKey(), sr.GetSat()); } }}bool CCachedId1Reader::x_GetIdCache(const string& key, const string& subkey, vector<int>& ints){ CStopWatch sw; if ( CollectStatistics() ) { sw.Start(); } size_t size = m_IdCache->GetSize(key, 0, subkey); ints.resize(size / sizeof(int)); if ( size == 0 || size % sizeof(int) != 0 || !m_IdCache->Read(key, 0, subkey, &ints[0], size) ) { if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: failed to read id cache record for id", key, subkey, time); resolve_id_count++; resolve_id_time += time; } return false; } if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: resolved id", key, subkey, time); resolve_id_count++; resolve_id_time += time; } return true;}bool CCachedId1Reader::x_GetIdCache(const string& key, const string& subkey, int& value){ CStopWatch sw; if ( CollectStatistics() ) { sw.Start(); } size_t size = m_IdCache->GetSize(key, 0, subkey); if ( size != sizeof(int) || !m_IdCache->Read(key, 0, subkey, &value, size) ) { if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: failed to read id cache record for id", key, subkey, time); resolve_id_count++; resolve_id_time += time; } return false; } if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: resolved id", key, subkey, time); resolve_id_count++; resolve_id_time += time; } return true;}void CCachedId1Reader::x_StoreIdCache(const string& key, const string& subkey, const vector<int>& ints){ CStopWatch sw; if ( CollectStatistics() ) { sw.Start(); } m_IdCache->Store(key, 0, subkey, &ints[0], ints.size()*sizeof(int)); if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: stored id", key, subkey, time); resolve_id_count++; resolve_id_time += time; }}void CCachedId1Reader::x_StoreIdCache(const string& key, const string& subkey, const int& value){ CStopWatch sw; if ( CollectStatistics() ) { sw.Start(); } m_IdCache->Store(key, 0, subkey, &value, sizeof(value)); if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: stored id", key, subkey, time); resolve_id_count++; resolve_id_time += time; }}bool CCachedId1Reader::GetSeqrefs(const string& key, TSeqrefs& srs){ vector<int> data; if ( !x_GetIdCache(key, GetSeqrefsSubkey(), data) ) { return false; } if ( data.size() % 5 != 0 || data.size() > 50 ) { return false; } ITERATE ( vector<int>, it, data ) { int gi = *it++; int sat = *it++; int satkey = *it++; int version = *it++; int flags = *it; CRef<CSeqref> sr(new CSeqref(gi, sat, satkey)); sr->SetVersion(version); sr->SetFlags(flags); srs.push_back(sr); } return true;}void CCachedId1Reader::StoreSeqrefs(const string& key, const TSeqrefs& srs){ vector<int> data; ITERATE ( TSeqrefs, it, srs ) { const CSeqref& sr = **it; data.push_back(sr.GetGi()); data.push_back(sr.GetSat()); data.push_back(sr.GetSatKey()); data.push_back(sr.GetVersion()); data.push_back(sr.GetFlags()); } x_StoreIdCache(key, GetSeqrefsSubkey(), data);}bool CCachedId1Reader::GetSeqrefs(int gi, TSeqrefs& srs){ if ( m_IdCache ) { return GetSeqrefs(GetIdKey(gi), srs); } else if ( m_OldIdCache) { CStopWatch sw; if ( CollectStatistics() ) { sw.Start(); } vector<int> data; if ( !m_OldIdCache->Read(gi, 0, data) ) { if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: failed to resolve gi", gi, time); resolve_gi_count++; resolve_gi_time += time; } return false; } _ASSERT(data.size() == 4 || data.size() == 8); for ( size_t pos = 0; pos + 4 <= data.size(); pos += 4 ) { int sat = data[pos]; int satkey = data[pos+1]; int version = data[pos+2]; int flags = data[pos+3]; CRef<CSeqref> sr(new CSeqref(gi, sat, satkey)); sr->SetVersion(version); sr->SetFlags(flags); srs.push_back(sr); } if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: resolved gi", gi, time); resolve_gi_count++; resolve_gi_time += time; } return true; } else { return false; }}void CCachedId1Reader::StoreSeqrefs(int gi, const TSeqrefs& srs){ if ( m_IdCache ) { StoreSeqrefs(GetIdKey(gi), srs); } else if ( m_OldIdCache ) { CStopWatch sw; if ( CollectStatistics() ) { sw.Start(); } vector<int> data; ITERATE ( TSeqrefs, it, srs ) { const CSeqref& sr = **it; data.push_back(sr.GetSat()); data.push_back(sr.GetSatKey()); data.push_back(sr.GetVersion()); data.push_back(sr.GetFlags()); } _ASSERT(data.size() == 4 || data.size() == 8); m_OldIdCache->Store(gi, 0, data); if ( CollectStatistics() ) { double time = sw.Elapsed(); LogStat("CId1Cache: saved gi", gi, time);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?