split_cache.cpp
来自「ncbi源码」· C++ 代码 · 共 827 行 · 第 1/2 页
CPP
827 行
SSplitterParams m_Params; CID2_Reply_Data m_Data; int m_DataType; AutoPtr<CNcbiOstrstream> m_MStream; AutoPtr<CObjectOStream> m_OStream;};string CSplitCacheApp::GetFileName(const string& key, const string& suffix, const string& ext){ string dir = key; string file = key + suffix; if ( !suffix.empty() && !ext.empty() ) { dir = dir + CDirEntry::GetPathSeparator() + ext; } CDir(dir).CreatePath(); return CDirEntry::MakePath(dir, file, ext);}void CSplitCacheApp::Process(void){ const CArgs& args = GetArgs(); m_DumpAsnText = args["dump"]; m_DumpAsnBinary = args["bdump"]; if ( args["compress"] ) { m_SplitterParams.m_Compression = m_SplitterParams.eCompression_nlm_zip; } m_Resplit = args["resplit"]; m_Recurse = args["recurse"]; m_SplitterParams.SetChunkSize(args["chunk_size"].AsInteger()*1024); if ( args["gi"] ) { ProcessGi(args["gi"].AsInteger()); } if ( args["gi_list"] ) { CNcbiIstream& in = args["gi_list"].AsInputFile(); int gi; while ( in >> gi ) { ProcessGi(gi); } } if ( args["id"] ) { CSeq_id id(args["id"].AsString()); ProcessSeqId(id); } if ( args["id_list"] ) { CNcbiIstream& in = args["id_list"].AsInputFile(); string id_name; while ( NcbiGetline(in, id_name, '\n') ) { CSeq_id id(id_name); ProcessSeqId(id); } }}void CSplitCacheApp::ProcessGi(int gi){ CSeq_id id; id.SetGi(gi); ProcessSeqId(id);}void CSplitCacheApp::ProcessSeqId(const CSeq_id& id){ CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(id); if ( !m_ProcessedIds.insert(idh).second ) { // already processed return; } if ( m_RecursionLevel == 0 ) { m_Scope->ResetHistory(); } LINE("Processing: " << id.AsFastaString()); {{ CLevelGuard level(m_RecursionLevel); CId1Reader::TSeqrefs srs; m_Reader->ResolveSeq_id(srs, id, 0); if ( srs.empty() ) { LINE("Skipping: no blobs"); return; } ITERATE ( CId1Reader::TSeqrefs, it, srs ) { ProcessBlob(**it); } if ( m_Recurse ) { LINE("Processing referenced sequences:"); CBioseq_Handle bh = m_Scope->GetBioseqHandle(idh); if ( bh ) { CSeqMap_CI it = bh.GetSeqMap() .begin_resolved(&*m_Scope, 0, CSeqMap::fFindRef); while ( it ) { ProcessSeqId(*it.GetRefSeqid().GetSeqId()); ++it; } } } }} LINE("End of processing: " << id.AsFastaString());}template<class C>void Dump(CSplitCacheApp* app, const C& obj, ESerialDataFormat format, const string& key, const string& suffix = kEmptyStr){ string ext; switch ( format ) { case eSerial_AsnText: ext = "asn"; break; case eSerial_AsnBinary: ext = "asb"; break; case eSerial_Xml: ext = "xml"; break; default: ext = "asn"; break; } string file_name = app->GetFileName(key, suffix, ext); WAIT_LINE4(app) << "Dumping to " << file_name << " ..."; AutoPtr<CObjectOStream> out(CObjectOStream::Open(file_name, format)); *out << obj;}enum EDataType{ eDataType_MainBlob = 0, eDataType_SplitInfo = 1, eDataType_Chunk = 2};template<class C>void DumpData(CSplitCacheApp* app, const C& obj, EDataType data_type, const string& key, const string& suffix = kEmptyStr){ string file_name = app->GetFileName(key, suffix, "bin"); WAIT_LINE4(app) << "Storing to " << file_name << " ..."; CSplitDataMaker data(app->GetParams(), data_type); data << obj; AutoPtr<CObjectOStream> out (CObjectOStream::Open(file_name, eSerial_AsnBinary)); *out << data.GetData();}template<class C>void StoreToCache(CSplitCacheApp* app, const C& obj, EDataType data_type, const CSeqref& seqref, const string& suffix = kEmptyStr){ string key = app->GetReader().GetBlobKey(seqref); WAIT_LINE4(app) << "Storing to cache " << key << " ..."; CNcbiOstrstream stream; {{ CSplitDataMaker data(app->GetParams(), data_type); data << obj; AutoPtr<CObjectOStream> out (CObjectOStream::Open(eSerial_AsnBinary, stream)); *out << data.GetData(); }} size_t size = stream.pcount(); line << setiosflags(ios::fixed) << setprecision(2) << " " << setw(7) << (size/1024.0) << " KB"; const char* data = stream.str(); stream.freeze(false); app->GetCache().Store(key, seqref.GetVersion(), suffix, data, size);}void CSplitCacheApp::ProcessBlob(const CSeqref& seqref){ { pair<int, int> key = seqref.GetKeyByTSE(); pair<TProcessedBlobs::iterator, bool> ins = m_ProcessedBlobs.insert(key); if ( !ins.second ) { // already processed return; } } LINE("Processing blob "<< seqref.printTSE()); CLevelGuard level(m_RecursionLevel); int version = m_Reader->GetVersion(seqref, 0); if ( version > 1 ) { CTime time(time_t(version*60)); LINE("Blob version: " << version << " - " << time.AsString()); } else { LINE("Blob version: " << version); } string blob_key = m_Reader->GetBlobKey(seqref); if ( m_Cache->GetSize(blob_key, version, "Skeleton") ) { if ( m_Resplit ) { WAIT_LINE << "Removing old split data..."; m_Cache->Remove(blob_key); } else { LINE("Already splitted: skipping"); return; } } if ( m_Reader->IsSNPSeqref(seqref) ) { LINE("Skipping SNP blob: not implemented"); return; } CBioseq_Handle bh; {{ WAIT_LINE << "Loading..."; bh = m_Scope->GetBioseqHandle (CSeq_id_Handle::GetGiHandle(seqref.GetGi())); }} if ( !bh ) { LINE("Skipping: no bioseq???"); return; } CConstRef<CSeq_entry> seq_entry; if ( !m_Reader->IsSNPSeqref(seqref) ) { // get non-SNP blob seq_entry = bh.GetTopLevelEntry().GetCompleteSeq_entry(); } else { LINE("Skipping SNP blob: not implemented"); return; /* SAnnotSelector sel; sel.SetMaxSize(1); sel.SetFeatSubtype(CSeqFeatData::eSubtype_variation); CFeat_CI it(bh, 0, 0, sel); if ( !it ) { LINE("Skipping SNP blob: empty"); return; } const CSeq_annot& seq_annot = it.GetSeq_annot(); CConstRef<CSeq_annot_Info> seq_annot_info = m_Scope->GetImpl().x_GetSeq_annot_Info(seq_annot); blob.Reset(&seq_annot_info->GetTSE_Info()); */ } if ( m_DumpAsnText ) { Dump(this, *seq_entry, eSerial_AsnText, blob_key); } if ( m_DumpAsnBinary ) { Dump(this, *seq_entry, eSerial_AsnBinary, blob_key); } size_t blob_size = m_Cache->GetSize(m_Reader->GetBlobKey(seqref), version, m_Reader->GetSeqEntrySubkey()); if ( blob_size == 0 ) { LINE("Skipping: blob is not in cache"); return; } if ( blob_size <= m_SplitterParams.m_MaxChunkSize ) { LINE("Skipping: blob is small enough: " << blob_size); return; } LINE("Blob size: " << blob_size); CBlobSplitter splitter(m_SplitterParams); if ( !splitter.Split(*seq_entry) ) { LINE("Skipping: no chunks after splitting"); return; } const CSplitBlob& blob = splitter.GetBlob(); if ( m_DumpAsnText ) { Dump(this, blob.GetMainBlob(), eSerial_AsnText, blob_key, "-main"); Dump(this, blob.GetSplitInfo(), eSerial_AsnText, blob_key, "-split"); ITERATE ( CSplitBlob::TChunks, it, blob.GetChunks() ) { string suffix = "-chunk-" + NStr::IntToString(it->first); Dump(this, *it->second, eSerial_AsnText, blob_key, suffix); } } if ( m_DumpAsnBinary ) { Dump(this, blob.GetMainBlob(), eSerial_AsnBinary, blob_key, "-main"); Dump(this, blob.GetSplitInfo(), eSerial_AsnBinary, blob_key, "-split"); ITERATE ( CSplitBlob::TChunks, it, blob.GetChunks() ) { string suffix = "-chunk-" + NStr::IntToString(it->first); Dump(this, *it->second, eSerial_AsnBinary, blob_key, suffix); } } {{ // storing split data DumpData(this, blob.GetMainBlob(), eDataType_MainBlob, blob_key, "-main"); DumpData(this, blob.GetSplitInfo(), eDataType_SplitInfo, blob_key, "-split"); ITERATE ( CSplitBlob::TChunks, it, blob.GetChunks() ) { string suffix = "-chunk-" + NStr::IntToString(it->first); DumpData(this, *it->second, eDataType_Chunk, blob_key, suffix); } }} {{ // storing split data into cache StoreToCache(this, blob.GetMainBlob(), eDataType_MainBlob, seqref, m_Reader->GetSkeletonSubkey()); StoreToCache(this, blob.GetSplitInfo(), eDataType_SplitInfo, seqref, m_Reader->GetSplitInfoSubkey()); ITERATE ( CSplitBlob::TChunks, it, blob.GetChunks() ) { StoreToCache(this, *it->second, eDataType_Chunk, seqref, m_Reader->GetChunkSubkey(it->first)); } }}}CConstRef<CSeqref> CSplitCacheApp::GetSeqref(CBioseq_Handle bh){ CSeq_entry_Handle tse = bh.GetTopLevelEntry(); CConstRef<CObject> id = tse.GetBlobId(); return ConstRef(dynamic_cast<const CSeqref*>(id.GetPointer()));}END_SCOPE(objects)END_NCBI_SCOPE/////////////////////////////////////////////////////////////////////////////// MAINint main(int argc, const char* argv[]){ return ncbi::objects::CSplitCacheApp().AppMain(argc, argv);}/** ---------------------------------------------------------------------------* $Log: split_cache.cpp,v $* Revision 1000.2 2004/06/01 19:42:02 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19** Revision 1.19 2004/05/21 21:42:52 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.18 2004/04/28 17:06:26 vasilche* Load split blobs from new ICache.** Revision 1.17 2004/04/28 16:29:15 vasilche* Store split results into new ICache.** Revision 1.16 2004/03/16 16:03:11 vasilche* Removed Windows EOL.** Revision 1.15 2004/03/16 15:47:29 vasilche* Added CBioseq_set_Handle and set of EditHandles** Revision 1.14 2004/02/09 19:18:55 grichenk* Renamed CDesc_CI to CSeq_descr_CI. Redesigned CSeq_descr_CI* and CSeqdesc_CI to avoid using data directly.** Revision 1.13 2004/01/22 20:10:37 vasilche* 1. Splitted ID2 specs to two parts.* ID2 now specifies only protocol.* Specification of ID2 split data is moved to seqsplit ASN module.* For now they are still reside in one resulting library as before - libid2.* As the result split specific headers are now in objects/seqsplit.* 2. Moved ID2 and ID1 specific code out of object manager.* Protocol is processed by corresponding readers.* ID2 split parsing is processed by ncbi_xreader library - used by all readers.* 3. Updated OBJMGR_LIBS correspondingly.** Revision 1.12 2004/01/13 16:55:57 vasilche* CReader, CSeqref and some more classes moved from xobjmgr to separate lib.* Headers moved from include/objmgr to include/objtools/data_loaders/genbank.** Revision 1.11 2004/01/07 17:37:37 vasilche* Fixed include path to genbank loader.* Moved split_cache application.** Revision 1.10 2003/12/30 16:06:15 vasilche* Compression methods moved to separate header: id2_compress.hpp.** Revision 1.9 2003/12/03 19:30:45 kuznets* Misprint fixed** Revision 1.8 2003/12/02 23:46:20 vasilche* Fixed INTERNAL COMPILER ERROR on MSVC - splitted expression.** Revision 1.7 2003/12/02 23:24:33 vasilche* Added "-recurse" option to split all sequences referenced by SeqMap.** Revision 1.6 2003/12/02 19:59:15 vasilche* Added GetFileName() declaration.** Revision 1.5 2003/12/02 19:12:24 vasilche* Fixed compilation on MSVC.** Revision 1.4 2003/11/28 20:27:44 vasilche* Correctly print log lines in LINE macro.** Revision 1.3 2003/11/26 23:05:00 vasilche* Removed extra semicolons after BEGIN_SCOPE and END_SCOPE.** Revision 1.2 2003/11/26 17:56:03 vasilche* Implemented ID2 split in ID1 cache.* Fixed loading of splitted annotations.** Revision 1.1 2003/11/12 16:18:32 vasilche* First implementation of ID2 blob splitter withing cache.** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?