bdbloader.cpp
来自「ncbi源码」· C++ 代码 · 共 229 行
CPP
229 行
/* * =========================================================================== * PRODUCTION $Log: bdbloader.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:41:28 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.4 * PRODUCTION * =========================================================================== *//* $Id: bdbloader.cpp,v 1000.1 2004/06/01 19:41:28 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Christiam Camacho** File Description:* Data loader implementation that uses the blast databases** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/data_loaders/blastdb/bdbloader.hpp>#include <objmgr/impl/handle_range_map.hpp>#include <ctools/asn_converter.hpp>#include <objmgr/seq_id_handle.hpp>#include <objmgr/impl/data_source.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objmgr/util/sequence.hpp>//=======================================================================// BlastDbDataLoader Public interface // BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CBlastDbDataLoader::CBlastDbDataLoader(const string& loader_name, const string& dbname, const EDbType dbtype) : CDataLoader(loader_name), m_dbname(dbname), m_dbtype(dbtype), m_rdfp(0){ m_mutex = new CFastMutex();}CBlastDbDataLoader::~CBlastDbDataLoader(void){ if (m_rdfp) { CFastMutexGuard mtx(*m_mutex); if (m_rdfp) m_rdfp = readdb_destruct(m_rdfp); } delete m_mutex;}// TODO Note that the ranges are ignored for right now// How to handle other choices?voidCBlastDbDataLoader::GetRecords(const CSeq_id_Handle& idh, const EChoice choice){ //LOG_POST("***CBlastDbDataLoader::GetRecords***"); // only eBioseq and eBioseqCore are supported switch (choice) { case eBlob: case eCore: case eSequence: case eFeatures: case eGraph: case eAll: default: LOG_POST("Invalid choice: " + NStr::IntToString(choice)); return; case eBioseq: case eBioseqCore: break; } // Open the blast database if it hasn't been accessed yet if (!m_rdfp) { CFastMutexGuard mtx(*m_mutex); if (!m_rdfp) { char* tmp = strdup(m_dbname.c_str()); m_rdfp = readdb_new_ex2(tmp, (int)m_dbtype, READDB_NEW_INDEX | READDB_NEW_DO_TAXDB, NULL, NULL); free(tmp); } } // for each seqid in hrmap, look them up in db and add them to the data // source {{ DECLARE_ASN_CONVERTER(CSeq_id, SeqId, sic); DECLARE_ASN_CONVERTER(CBioseq, Bioseq, bc); SeqIdPtr sip = NULL, sip_tmp = NULL, sip_itr = NULL, all_seqids = NULL; BioseqPtr bsp = NULL; int oid = -1; unsigned int index = 0; TOid2Bioseq::iterator found; CConstRef<CSeq_id> seq_id = idh.GetSeqId(); if ( !(sip = sic.ToC(*seq_id)) ) return; if ( (oid = SeqId2OrdinalId(m_rdfp, sip)) < 0) return; // If we've already retrieved this particular ordinal id, ignore it if ( (found = m_cache.find(oid)) != m_cache.end()) return; {{ // protect access to the blast database CFastMutexGuard mtx(*m_mutex); bsp = readdb_get_bioseq(m_rdfp, oid); // Retrieve multiple seqids if there are any while (readdb_get_header(m_rdfp, oid, &index, &sip_tmp, NULL)) { if (!all_seqids) { all_seqids = sip_tmp; } else { sip_itr = all_seqids; while (sip_itr->next) sip_itr = sip_itr->next; sip_itr->next = sip_tmp; } } }} if (all_seqids) { SeqIdSetFree(bsp->id); bsp->id = all_seqids; } CRef<CBioseq> bsr(bc.FromC(bsp)); CRef<CSeq_entry> ser(new CSeq_entry()); ser->Select(CSeq_entry::e_Seq); ser->SetSeq(*bsr); GetDataSource()->AddTSE(*ser); m_cache[oid] = bsr; bsp = BioseqFree(bsp); sip = SeqIdFree(sip); }}}voidCBlastDbDataLoader::DebugDump(CDebugDumpContext ddc, unsigned int depth) const{ //LOG_POST("CBlastDbDataLoader::DebugDump\n"); ddc.SetFrame("CGBLoader"); // CObject::DebugDump( ddc, depth); DebugDumpValue(ddc,"m_dbname", m_dbname); DebugDumpValue(ddc,"m_dbtype", m_dbtype); DebugDumpValue(ddc,"m_rdfp", m_rdfp);}END_SCOPE(objects)END_NCBI_SCOPE/* ========================================================================== * * $Log: bdbloader.cpp,v $ * Revision 1000.1 2004/06/01 19:41:28 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.4 * * Revision 1.4 2004/05/21 21:42:51 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.3 2003/09/30 17:22:06 vasilche * Fixed for new CDataLoader interface. * * Revision 1.2 2003/09/30 16:36:36 vasilche * Updated CDataLoader interface. * * Revision 1.1 2003/08/06 16:15:18 jianye * Add BLAST DB loader. * * Revision 1.7 2003/05/19 21:11:46 camacho * Added caching * * Revision 1.6 2003/05/16 14:27:48 camacho * Proper use of namespaces * * Revision 1.5 2003/05/15 15:58:28 camacho * Minor changes * * Revision 1.4 2003/05/08 15:11:43 camacho * Changed prototype for GetRecords in base class * * Revision 1.3 2003/03/21 17:42:54 camacho * Added loading of taxonomy info * * Revision 1.2 2003/03/18 21:19:26 camacho * Retrieve all seqids if available * * Revision 1.1 2003/03/14 22:37:26 camacho * Initial revision * * * ========================================================================== */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?