split_cache.cpp
来自「ncbi源码」· C++ 代码 · 共 827 行 · 第 1/2 页
CPP
827 行
/* * =========================================================================== * PRODUCTION $Log: split_cache.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:42:02 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19 * PRODUCTION * =========================================================================== *//* $Id: split_cache.cpp,v 1000.2 2004/06/01 19:42:02 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Eugene Vasilchenko** File Description:* Application for splitting blobs withing ID1 cache** ===========================================================================*/#include <ncbi_pch.hpp>#include "split_cache.hpp"#include <corelib/ncbistd.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbienv.hpp>#include <corelib/ncbifile.hpp>#include <corelib/ncbitime.hpp>#include <corelib/ncbiargs.hpp>#include <corelib/ncbistre.hpp>#include <serial/objistr.hpp>#include <serial/objostr.hpp>#include <serial/serial.hpp>// Objects includes#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seqfeat/seqfeat__.hpp>#include <objects/id2/ID2_Reply_Data.hpp>#include <objects/seqsplit/ID2S_Split_Info.hpp>#include <objects/seqsplit/ID2S_Chunk_Id.hpp>#include <objects/seqsplit/ID2S_Chunk.hpp>// Object manager includes#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_map.hpp>#include <objmgr/seq_map_ci.hpp>#include <objmgr/seq_descr_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/graph_ci.hpp>#include <objmgr/align_ci.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objmgr/bioseq_ci.hpp>#include <objmgr/seq_annot_ci.hpp>#include <objmgr/impl/synonyms.hpp>#include <objmgr/impl/tse_info.hpp>#include <objmgr/impl/tse_chunk_info.hpp>#include <objmgr/impl/seq_annot_info.hpp>// cache#include <objtools/data_loaders/genbank/readers/id1/reader_id1_cache.hpp>#include <bdb/bdb_blobcache.hpp>#include <objmgr/split/blob_splitter.hpp>#include <objmgr/split/id2_compress.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)static const int kDefaultCacheBlobAge = 5; // keep objects for 5 daysstatic const int kDefaultCacheIdAge = 1; // keep id resolution for 1 dayclass CLog{public: CLog(const CSplitCacheApp* app) : m_App(app), m_Stream(0) { } ~CLog(void) { End(); } void End(void) { if ( m_Stream ) { *m_Stream << NcbiEndl; m_Stream = 0; } } CNcbiOstream& Start(void) { if ( !m_Stream ) { m_Stream = &m_App->Info(); } return *m_Stream; } operator CNcbiOstream&(void) { return Start(); } class CFlusher { public: CFlusher(CNcbiOstream& out) : m_Stream(out) { } ~CFlusher(void) { m_Stream.flush(); } template<typename T> CFlusher& operator<<(const T& t) { m_Stream << t; return *this; } private: CNcbiOstream& m_Stream; }; template<typename T> CFlusher operator<<(const T& t) { Start() << t; return CFlusher(*m_Stream); }private: const CSplitCacheApp* m_App; CNcbiOstream* m_Stream;};CSplitCacheApp::CSplitCacheApp(void) : m_DumpAsnText(false), m_DumpAsnBinary(false), m_Resplit(false), m_Recurse(false), m_RecursionLevel(0){}CSplitCacheApp::~CSplitCacheApp(void){}void CSplitCacheApp::Init(void){ // Prepare command line descriptions // // Create auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions); // set of entries to process arg_desc->AddOptionalKey("gi", "Gi", "GI of the Seq-Entry to process", CArgDescriptions::eInteger); arg_desc->AddOptionalKey("gi_list", "GiList", "file with list of GIs to process", CArgDescriptions::eInputFile); arg_desc->AddOptionalKey("id", "SeqId", "Seq-id of the Seq-Entry to process", CArgDescriptions::eString); arg_desc->AddOptionalKey("id_list", "SeqIdList", "file with list of Seq-ids to process", CArgDescriptions::eInputFile); arg_desc->AddFlag("all", "process all entries in cache"); arg_desc->AddFlag("recurse", "process all entries referenced by specified ones"); // cache parameters arg_desc->AddDefaultKey("cache_dir", "CacheDir", "directory of GenBank cache", CArgDescriptions::eInputFile, ".genbank_cache"); // split parameters arg_desc->AddDefaultKey ("chunk_size", "ChunkSize", "approximate size of chunks to create (in KB)", CArgDescriptions::eInteger, NStr::IntToString(SSplitterParams::kDefaultChunkSize/1024)); arg_desc->AddFlag("compress", "try to compress split data"); arg_desc->AddFlag("resplit", "resplit already splitted data"); // debug parameters arg_desc->AddFlag("dump", "dump blobs in ASN.1 text format"); arg_desc->AddFlag("bdump", "dump blobs in ASN.1 binary format"); // Program description string prog_description = "Example of the C++ object manager usage\n"; arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), prog_description, false); // Pass argument descriptions to the application // SetupArgDescriptions(arg_desc.release());}int CSplitCacheApp::Run(void){ SetDiagPostLevel(eDiag_Info); SetupCache(); Process(); return 0;}void CSplitCacheApp::SetupCache(void){ const CArgs& args = GetArgs(); const CNcbiRegistry& reg = GetConfig(); string cache_dir; {{ // set cache directory if ( args["cache_dir"] ) { cache_dir = args["cache_dir"].AsString(); } else { cache_dir = reg.GetString("LOCAL_CACHE", "Path", cache_dir, CNcbiRegistry::eErrPost); } if ( cache_dir.empty() ) { ERR_POST(Fatal << "empty cache directory name"); } }} {{ // create cache directory LINE("cache directory is \"" << cache_dir << "\""); {{ // make sure our cache directory exists first CDir dir(cache_dir); if ( !dir.Exists() ) { dir.Create(); } }} }} {{ // blob cache CBDB_Cache* cache; m_Cache.reset(cache = new CBDB_Cache()); int blob_age = reg.GetInt("LOCAL_CACHE", "Age", kDefaultCacheBlobAge, CNcbiRegistry::eErrPost); // Cache cleaning // Objects age should be assigned in days, negative value // means cleaning is disabled if ( blob_age <= 0 ) { blob_age = kDefaultCacheBlobAge; } ICache::TTimeStampFlags flags = ICache::fTimeStampOnRead | ICache::fExpireLeastFrequentlyUsed | ICache::fPurgeOnStartup; cache->SetTimeStampPolicy(flags, blob_age*24*60*60); cache->Open(cache_dir.c_str(), "blobs"); // purge old blobs CTime time_stamp(CTime::eCurrent); time_t age = time_stamp.GetTimeT(); age -= 60 * 60 * 24 * blob_age; cache->Purge(age); }} {{ // set cache id age CBDB_Cache* cache; m_IdCache.reset(cache = new CBDB_Cache()); int id_age = reg.GetInt("LOCAL_CACHE", "IdAge", kDefaultCacheIdAge, CNcbiRegistry::eErrPost); if ( id_age <= 0 ) { id_age = kDefaultCacheIdAge; } ICache::TTimeStampFlags flags = ICache::fTimeStampOnCreate| ICache::fCheckExpirationAlways; cache->SetTimeStampPolicy(flags, id_age*24*60*60); cache->Open(cache_dir.c_str(), "ids"); }} {{ // create loader m_Reader = new CCachedId1Reader(1, &*m_Cache, &*m_IdCache); m_Loader.Reset(new CGBDataLoader("GenBank", m_Reader)); }} {{ // create object manager m_ObjMgr.Reset(new CObjectManager); m_ObjMgr->RegisterDataLoader(*m_Loader, CObjectManager::eDefault); }} {{ // Create scope m_Scope.Reset(new CScope(*m_ObjMgr)); m_Scope->AddDefaults(); }}}CNcbiOstream& CSplitCacheApp::Info(void) const{ for ( size_t i = 0; i < m_RecursionLevel; ++i ) { NcbiCout << " "; } return NcbiCout;}class CSplitDataMaker{public: CSplitDataMaker(const SSplitterParams& params, int data_type) : m_Params(params), m_DataType(data_type) { } template<class C> void operator<<(const C& obj) { OpenDataStream() << obj; CloseDataStream(m_DataType); } CObjectOStream& OpenDataStream(void) { m_OStream.reset(); m_MStream.reset(new CNcbiOstrstream); m_OStream.reset(CObjectOStream::Open(eSerial_AsnBinary, *m_MStream)); return *m_OStream; } void CloseDataStream(int data_type) { m_Data.Reset(); m_Data.SetData_type(data_type); m_Data.SetData_format(eSerial_AsnBinary); m_Data.SetData_compression(m_Params.m_Compression); m_OStream.reset(); size_t size = m_MStream->pcount(); const char* data = m_MStream->str(); m_MStream->freeze(false); CId2Compressor::Compress(m_Params, m_Data.SetData(), data, size); m_MStream.reset(); } const CID2_Reply_Data& GetData(void) const { return m_Data; }private:
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?