reader_id1_cache.cpp

来自「ncbi源码」· C++ 代码 · 共 1,511 行 · 第 1/3 页

CPP
1,511
字号
/* * =========================================================================== * PRODUCTION $Log: reader_id1_cache.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:41:59  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.25 * PRODUCTION * =========================================================================== *//*  $Id: reader_id1_cache.cpp,v 1000.2 2004/06/01 19:41:59 gouriano Exp $ * =========================================================================== *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * *  Author:  Eugene Vasilchenko, Anatoliy Kuznetsov * *  File Description: Cached extension of data reader from ID1 * */#include <ncbi_pch.hpp>#include <objtools/data_loaders/genbank/readers/id1/reader_id1_cache.hpp>#include <objtools/data_loaders/genbank/reader_snp.hpp>#include <objtools/data_loaders/genbank/split_parser.hpp>#include <corelib/ncbitime.hpp>#include <util/cache/blob_cache.hpp>#include <util/cache/int_cache.hpp>#include <util/cache/icache.hpp>#include <util/rwstream.hpp>#include <util/bytesrc.hpp>#include <serial/objistr.hpp>#include <serial/objistrasnb.hpp>#include <serial/objostrasnb.hpp>#include <objmgr/objmgr_exception.hpp>#include <objmgr/impl/snp_annot_info.hpp>#include <objmgr/impl/tse_chunk_info.hpp>#include <util/compress/reader_zlib.hpp>#include <connect/ncbi_conn_stream.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqsplit/ID2S_Split_Info.hpp>#include <objects/seqsplit/ID2S_Chunk_Info.hpp>#include <objects/seqsplit/ID2S_Chunk.hpp>#include <objects/seqsplit/ID2S_Chunk_Id.hpp>#include <objects/id1/id1__.hpp>#include <objects/id2/ID2_Reply_Data.hpp>#include <serial/serial.hpp>#include <stdio.h>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)/// Utility function to skip part of the input byte sourcevoid Id1ReaderSkipBytes(CByteSourceReader& reader, size_t to_skip);static size_t resolve_id_count = 0;static double resolve_id_time = 0;static size_t resolve_gi_count = 0;static double resolve_gi_time = 0;static size_t resolve_ver_count = 0;static double resolve_ver_time = 0;static size_t main_blob_count = 0;static double main_bytes = 0;static double main_time = 0;static size_t chunk_blob_count = 0;static double chunk_bytes = 0;static double chunk_time = 0;static size_t snp_load_count = 0;static double snp_load_bytes = 0;static double snp_load_time = 0;static size_t snp_store_count = 0;static double snp_store_bytes = 0;static double snp_store_time = 0;CCachedId1Reader::CCachedId1Reader(TConn noConn,                                    IBLOB_Cache* blob_cache,                                   IIntCache* id_cache)    : CId1Reader(noConn),      m_BlobCache(0), m_IdCache(0),      m_OldBlobCache(0), m_OldIdCache(0){    SetBlobCache(blob_cache);    SetIdCache(id_cache);}CCachedId1Reader::CCachedId1Reader(TConn noConn,                                    ICache* blob_cache,                                   ICache* id_cache)    : CId1Reader(noConn),      m_BlobCache(0), m_IdCache(0),      m_OldBlobCache(0), m_OldIdCache(0){    SetBlobCache(blob_cache);    SetIdCache(id_cache);}CCachedId1Reader::~CCachedId1Reader(){    if ( CollectStatistics() ) {        PrintStatistics();    }}void CCachedId1Reader::PrintStatistics(void) const{    PrintStat("Cache resolution: resolved",                resolve_id_count, "ids", resolve_id_time);    PrintStat("Cache resolution: resolved",                resolve_gi_count, "gis", resolve_gi_time);    PrintStat("Cache resolution: resolved",              resolve_ver_count, "blob vers", resolve_ver_time);    PrintBlobStat("Cache main: loaded",                  main_blob_count, main_bytes, main_time);    PrintBlobStat("Cache chunk: loaded",                  chunk_blob_count, chunk_bytes, chunk_time);    PrintBlobStat("Cache SNP: loaded",                  snp_load_count, snp_load_bytes, snp_load_time);    PrintBlobStat("Cache SNP: stored",                  snp_store_count, snp_store_bytes, snp_store_time);}void CCachedId1Reader::SetBlobCache(ICache* blob_cache){    m_OldBlobCache = 0;    m_BlobCache = blob_cache;}void CCachedId1Reader::SetIdCache(ICache* id_cache){    m_OldIdCache = 0;    m_IdCache = id_cache;}void CCachedId1Reader::SetBlobCache(IBLOB_Cache* blob_cache){    m_BlobCache = 0;    if ( blob_cache && blob_cache != m_OldBlobCache ) {        ERR_POST(Warning << "CCachedId1Reader: "                 "IBLOB_Cache is deprecated, use ICache instead");    }    m_OldBlobCache = blob_cache;}void CCachedId1Reader::SetIdCache(IIntCache* id_cache){    m_IdCache = 0;    if ( id_cache && id_cache != m_OldIdCache ) {        ERR_POST(Warning << "CCachedId1Reader: "                 "IIntCache is deprecated, use ICache instead");    }    m_OldIdCache = id_cache;}string CCachedId1Reader::GetBlobKey(const CSeqref& seqref) const{    int sat = seqref.GetSat();    int sat_key = seqref.GetSatKey();    char szBlobKeyBuf[256];    sprintf(szBlobKeyBuf, "%i-%i", sat, sat_key);    return szBlobKeyBuf;}string CCachedId1Reader::GetIdKey(int gi) const{    return NStr::IntToString(gi);}string CCachedId1Reader::GetIdKey(const CSeq_id& id) const{    return id.IsGi()? GetIdKey(id.GetGi()): id.AsFastaString();}const char* CCachedId1Reader::GetSeqrefsSubkey(void) const{    return "srs";}const char* CCachedId1Reader::GetGiSubkey(void) const{    return "gi";}const char* CCachedId1Reader::GetBlobVersionSubkey(void) const{    return "ver";}const char* CCachedId1Reader::GetSeqEntrySubkey(void) const{    return "Seq-entry";}const char* CCachedId1Reader::GetSNPTableSubkey(void) const{    return "SNP table";}const char* CCachedId1Reader::GetSkeletonSubkey(void) const{    return "Skeleton";}const char* CCachedId1Reader::GetSplitInfoSubkey(void) const{    return "ID2S-Split-Info";}string CCachedId1Reader::GetChunkSubkey(int chunk_id) const{    return "ID2S-Chunk "+NStr::IntToString(chunk_id);}void CCachedId1Reader::PurgeSeqrefs(const TSeqrefs& srs, const CSeq_id& id){    if ( m_IdCache ) {        m_IdCache->Remove(GetIdKey(id));        ITERATE ( TSeqrefs, it, srs ) {            const CSeqref& sr = **it;            m_IdCache->Remove(GetBlobKey(sr));        }    }    else if ( m_OldIdCache ) {        ITERATE ( TSeqrefs, it, srs ) {            const CSeqref& sr = **it;            m_OldIdCache->Remove(sr.GetGi(), 0);            m_OldIdCache->Remove(sr.GetSatKey(), sr.GetSat());        }    }}bool CCachedId1Reader::x_GetIdCache(const string& key,                                    const string& subkey,                                    vector<int>& ints){    CStopWatch sw;    if ( CollectStatistics() ) {        sw.Start();    }    size_t size = m_IdCache->GetSize(key, 0, subkey);    ints.resize(size / sizeof(int));    if ( size == 0 || size % sizeof(int) != 0 ||         !m_IdCache->Read(key, 0, subkey, &ints[0], size) ) {        if ( CollectStatistics() ) {            double time = sw.Elapsed();            LogStat("CId1Cache: failed to read id cache record for id",                    key, subkey, time);            resolve_id_count++;            resolve_id_time += time;        }        return false;    }    if ( CollectStatistics() ) {        double time = sw.Elapsed();        LogStat("CId1Cache: resolved id", key, subkey, time);        resolve_id_count++;        resolve_id_time += time;    }    return true;}bool CCachedId1Reader::x_GetIdCache(const string& key,                                    const string& subkey,                                    int& value){    CStopWatch sw;    if ( CollectStatistics() ) {        sw.Start();    }    size_t size = m_IdCache->GetSize(key, 0, subkey);    if ( size != sizeof(int) ||         !m_IdCache->Read(key, 0, subkey, &value, size) ) {        if ( CollectStatistics() ) {            double time = sw.Elapsed();            LogStat("CId1Cache: failed to read id cache record for id",                    key, subkey, time);            resolve_id_count++;            resolve_id_time += time;        }        return false;    }    if ( CollectStatistics() ) {        double time = sw.Elapsed();        LogStat("CId1Cache: resolved id", key, subkey, time);        resolve_id_count++;        resolve_id_time += time;    }    return true;}void CCachedId1Reader::x_StoreIdCache(const string& key,                                      const string& subkey,                                      const vector<int>& ints){    CStopWatch sw;    if ( CollectStatistics() ) {        sw.Start();    }        m_IdCache->Store(key, 0, subkey, &ints[0], ints.size()*sizeof(int));        if ( CollectStatistics() ) {        double time = sw.Elapsed();        LogStat("CId1Cache: stored id", key, subkey, time);        resolve_id_count++;        resolve_id_time += time;    }}void CCachedId1Reader::x_StoreIdCache(const string& key,                                      const string& subkey,                                      const int& value){    CStopWatch sw;    if ( CollectStatistics() ) {        sw.Start();    }        m_IdCache->Store(key, 0, subkey, &value, sizeof(value));        if ( CollectStatistics() ) {        double time = sw.Elapsed();        LogStat("CId1Cache: stored id", key, subkey, time);        resolve_id_count++;        resolve_id_time += time;    }}bool CCachedId1Reader::GetSeqrefs(const string& key, TSeqrefs& srs){    vector<int> data;    if ( !x_GetIdCache(key, GetSeqrefsSubkey(), data) ) {        return false;    }    if ( data.size() % 5 != 0 || data.size() > 50 ) {        return false;    }    ITERATE ( vector<int>, it, data ) {        int gi      = *it++;        int sat     = *it++;        int satkey  = *it++;        int version = *it++;        int flags   = *it;        CRef<CSeqref> sr(new CSeqref(gi, sat, satkey));        sr->SetVersion(version);        sr->SetFlags(flags);        srs.push_back(sr);    }    return true;}void CCachedId1Reader::StoreSeqrefs(const string& key, const TSeqrefs& srs){    vector<int> data;    ITERATE ( TSeqrefs, it, srs ) {        const CSeqref& sr = **it;        data.push_back(sr.GetGi());        data.push_back(sr.GetSat());        data.push_back(sr.GetSatKey());        data.push_back(sr.GetVersion());        data.push_back(sr.GetFlags());    }    x_StoreIdCache(key, GetSeqrefsSubkey(), data);}bool CCachedId1Reader::GetSeqrefs(int gi, TSeqrefs& srs){    if ( m_IdCache ) {        return GetSeqrefs(GetIdKey(gi), srs);    }    else if ( m_OldIdCache) {        CStopWatch sw;        if ( CollectStatistics() ) {            sw.Start();        }        vector<int> data;        if ( !m_OldIdCache->Read(gi, 0, data) ) {            if ( CollectStatistics() ) {                double time = sw.Elapsed();                LogStat("CId1Cache: failed to resolve gi", gi, time);                resolve_gi_count++;                resolve_gi_time += time;            }            return false;        }            _ASSERT(data.size() == 4 || data.size() == 8);        for ( size_t pos = 0; pos + 4 <= data.size(); pos += 4 ) {            int sat = data[pos];            int satkey = data[pos+1];            int version = data[pos+2];            int flags = data[pos+3];            CRef<CSeqref> sr(new CSeqref(gi, sat, satkey));            sr->SetVersion(version);            sr->SetFlags(flags);            srs.push_back(sr);        }        if ( CollectStatistics() ) {            double time = sw.Elapsed();            LogStat("CId1Cache: resolved gi", gi, time);            resolve_gi_count++;            resolve_gi_time += time;        }        return true;    }    else {        return false;    }}void CCachedId1Reader::StoreSeqrefs(int gi, const TSeqrefs& srs){    if ( m_IdCache ) {        StoreSeqrefs(GetIdKey(gi), srs);    }    else if ( m_OldIdCache ) {        CStopWatch sw;        if ( CollectStatistics() ) {            sw.Start();        }        vector<int> data;        ITERATE ( TSeqrefs, it, srs ) {            const CSeqref& sr = **it;            data.push_back(sr.GetSat());            data.push_back(sr.GetSatKey());            data.push_back(sr.GetVersion());            data.push_back(sr.GetFlags());        }        _ASSERT(data.size() == 4 || data.size() == 8);        m_OldIdCache->Store(gi, 0, data);        if ( CollectStatistics() ) {            double time = sw.Elapsed();            LogStat("CId1Cache: saved gi", gi, time);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?