split_cache.cpp

来自「ncbi源码」· C++ 代码 · 共 827 行 · 第 1/2 页

CPP
827
字号
/* * =========================================================================== * PRODUCTION $Log: split_cache.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:42:02  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19 * PRODUCTION * =========================================================================== *//*  $Id: split_cache.cpp,v 1000.2 2004/06/01 19:42:02 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Eugene Vasilchenko** File Description:*   Application for splitting blobs withing ID1 cache** ===========================================================================*/#include <ncbi_pch.hpp>#include "split_cache.hpp"#include <corelib/ncbistd.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbienv.hpp>#include <corelib/ncbifile.hpp>#include <corelib/ncbitime.hpp>#include <corelib/ncbiargs.hpp>#include <corelib/ncbistre.hpp>#include <serial/objistr.hpp>#include <serial/objostr.hpp>#include <serial/serial.hpp>// Objects includes#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seqfeat/seqfeat__.hpp>#include <objects/id2/ID2_Reply_Data.hpp>#include <objects/seqsplit/ID2S_Split_Info.hpp>#include <objects/seqsplit/ID2S_Chunk_Id.hpp>#include <objects/seqsplit/ID2S_Chunk.hpp>// Object manager includes#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_map.hpp>#include <objmgr/seq_map_ci.hpp>#include <objmgr/seq_descr_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/graph_ci.hpp>#include <objmgr/align_ci.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objmgr/bioseq_ci.hpp>#include <objmgr/seq_annot_ci.hpp>#include <objmgr/impl/synonyms.hpp>#include <objmgr/impl/tse_info.hpp>#include <objmgr/impl/tse_chunk_info.hpp>#include <objmgr/impl/seq_annot_info.hpp>// cache#include <objtools/data_loaders/genbank/readers/id1/reader_id1_cache.hpp>#include <bdb/bdb_blobcache.hpp>#include <objmgr/split/blob_splitter.hpp>#include <objmgr/split/id2_compress.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)static const int kDefaultCacheBlobAge = 5; // keep objects for 5 daysstatic const int kDefaultCacheIdAge   = 1; // keep id resolution for 1 dayclass CLog{public:    CLog(const CSplitCacheApp* app)        : m_App(app), m_Stream(0)        {        }    ~CLog(void)        {            End();        }    void End(void)        {            if ( m_Stream ) {                *m_Stream << NcbiEndl;                m_Stream = 0;            }        }    CNcbiOstream& Start(void)        {            if ( !m_Stream ) {                m_Stream = &m_App->Info();            }            return *m_Stream;        }        operator CNcbiOstream&(void)        {            return Start();        }    class CFlusher    {    public:        CFlusher(CNcbiOstream& out)            : m_Stream(out)            {            }        ~CFlusher(void)            {                m_Stream.flush();            }        template<typename T>        CFlusher& operator<<(const T& t)            {                m_Stream << t;                return *this;            }            private:        CNcbiOstream& m_Stream;    };    template<typename T>    CFlusher operator<<(const T& t)        {            Start() << t;            return CFlusher(*m_Stream);        }private:    const CSplitCacheApp* m_App;    CNcbiOstream* m_Stream;};CSplitCacheApp::CSplitCacheApp(void)    : m_DumpAsnText(false), m_DumpAsnBinary(false),      m_Resplit(false), m_Recurse(false),      m_RecursionLevel(0){}CSplitCacheApp::~CSplitCacheApp(void){}void CSplitCacheApp::Init(void){    // Prepare command line descriptions    //    // Create    auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);    // set of entries to process    arg_desc->AddOptionalKey("gi", "Gi",                             "GI of the Seq-Entry to process",                             CArgDescriptions::eInteger);    arg_desc->AddOptionalKey("gi_list", "GiList",                             "file with list of GIs to process",                             CArgDescriptions::eInputFile);    arg_desc->AddOptionalKey("id", "SeqId",                             "Seq-id of the Seq-Entry to process",                             CArgDescriptions::eString);    arg_desc->AddOptionalKey("id_list", "SeqIdList",                             "file with list of Seq-ids to process",                             CArgDescriptions::eInputFile);    arg_desc->AddFlag("all",                      "process all entries in cache");    arg_desc->AddFlag("recurse",                      "process all entries referenced by specified ones");    // cache parameters    arg_desc->AddDefaultKey("cache_dir", "CacheDir",                            "directory of GenBank cache",                            CArgDescriptions::eInputFile,                            ".genbank_cache");    // split parameters    arg_desc->AddDefaultKey        ("chunk_size", "ChunkSize",         "approximate size of chunks to create (in KB)",         CArgDescriptions::eInteger,         NStr::IntToString(SSplitterParams::kDefaultChunkSize/1024));    arg_desc->AddFlag("compress",                      "try to compress split data");    arg_desc->AddFlag("resplit",                      "resplit already splitted data");    // debug parameters    arg_desc->AddFlag("dump",                      "dump blobs in ASN.1 text format");    arg_desc->AddFlag("bdump",                      "dump blobs in ASN.1 binary format");    // Program description    string prog_description = "Example of the C++ object manager usage\n";    arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),                              prog_description, false);    // Pass argument descriptions to the application    //    SetupArgDescriptions(arg_desc.release());}int CSplitCacheApp::Run(void){    SetDiagPostLevel(eDiag_Info);    SetupCache();        Process();    return 0;}void CSplitCacheApp::SetupCache(void){    const CArgs& args = GetArgs();    const CNcbiRegistry& reg = GetConfig();        string cache_dir;    {{ // set cache directory        if ( args["cache_dir"] ) {            cache_dir = args["cache_dir"].AsString();        }        else {            cache_dir = reg.GetString("LOCAL_CACHE", "Path",                                      cache_dir, CNcbiRegistry::eErrPost);        }        if ( cache_dir.empty() ) {            ERR_POST(Fatal << "empty cache directory name");        }    }}    {{ // create cache directory        LINE("cache directory is \"" << cache_dir << "\"");        {{            // make sure our cache directory exists first            CDir dir(cache_dir);            if ( !dir.Exists() ) {                dir.Create();            }        }}    }}    {{ // blob cache        CBDB_Cache* cache;        m_Cache.reset(cache = new CBDB_Cache());                int blob_age = reg.GetInt("LOCAL_CACHE", "Age", kDefaultCacheBlobAge,                                  CNcbiRegistry::eErrPost);        // Cache cleaning        // Objects age should be assigned in days, negative value        // means cleaning is disabled                    if ( blob_age <= 0 ) {            blob_age = kDefaultCacheBlobAge;        }                ICache::TTimeStampFlags flags =            ICache::fTimeStampOnRead |            ICache::fExpireLeastFrequentlyUsed |            ICache::fPurgeOnStartup;        cache->SetTimeStampPolicy(flags, blob_age*24*60*60);                cache->Open(cache_dir.c_str(), "blobs");        // purge old blobs        CTime time_stamp(CTime::eCurrent);        time_t age = time_stamp.GetTimeT();        age -= 60 * 60 * 24 * blob_age;        cache->Purge(age);    }}    {{ // set cache id age        CBDB_Cache* cache;        m_IdCache.reset(cache = new CBDB_Cache());                int id_age = reg.GetInt("LOCAL_CACHE", "IdAge", kDefaultCacheIdAge,                                CNcbiRegistry::eErrPost);                if ( id_age <= 0 ) {            id_age = kDefaultCacheIdAge;        }        ICache::TTimeStampFlags flags =            ICache::fTimeStampOnCreate|            ICache::fCheckExpirationAlways;        cache->SetTimeStampPolicy(flags, id_age*24*60*60);                cache->Open(cache_dir.c_str(), "ids");    }}    {{ // create loader        m_Reader = new CCachedId1Reader(1, &*m_Cache, &*m_IdCache);        m_Loader.Reset(new CGBDataLoader("GenBank", m_Reader));    }}    {{ // create object manager        m_ObjMgr.Reset(new CObjectManager);        m_ObjMgr->RegisterDataLoader(*m_Loader, CObjectManager::eDefault);    }}    {{ // Create scope        m_Scope.Reset(new CScope(*m_ObjMgr));        m_Scope->AddDefaults();    }}}CNcbiOstream& CSplitCacheApp::Info(void) const{    for ( size_t i = 0; i < m_RecursionLevel; ++i ) {        NcbiCout << "  ";    }    return NcbiCout;}class CSplitDataMaker{public:    CSplitDataMaker(const SSplitterParams& params, int data_type)        : m_Params(params), m_DataType(data_type)        {        }    template<class C>    void operator<<(const C& obj)        {            OpenDataStream() << obj;            CloseDataStream(m_DataType);        }    CObjectOStream& OpenDataStream(void)        {            m_OStream.reset();            m_MStream.reset(new CNcbiOstrstream);            m_OStream.reset(CObjectOStream::Open(eSerial_AsnBinary,                                                 *m_MStream));            return *m_OStream;        }    void CloseDataStream(int data_type)        {            m_Data.Reset();            m_Data.SetData_type(data_type);            m_Data.SetData_format(eSerial_AsnBinary);            m_Data.SetData_compression(m_Params.m_Compression);            m_OStream.reset();            size_t size = m_MStream->pcount();            const char* data = m_MStream->str();            m_MStream->freeze(false);            CId2Compressor::Compress(m_Params, m_Data.SetData(), data, size);            m_MStream.reset();        }    const CID2_Reply_Data& GetData(void) const        {            return m_Data;        }private:

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?