lds_query.cpp

来自「ncbi源码」· C++ 代码 · 共 456 行

CPP
456
字号
/* * =========================================================================== * PRODUCTION $Log: lds_query.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:45:50  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.12 * PRODUCTION * =========================================================================== *//*  $Id: lds_query.cpp,v 1000.2 2004/06/01 19:45:50 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Anatoliy Kuznetsov * * File Description:  Different query functions to LDS database. * */#include <ncbi_pch.hpp>#include <corelib/ncbistr.hpp>#include <bdb/bdb_cursor.hpp>#include <bdb/bdb_util.hpp>#include <bdb/bdb_query.hpp>#include <bdb/bdb_query_parser.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/general/Object_id.hpp>#include <objtools/lds/lds_query.hpp>#include <objtools/lds/lds_set.hpp>#include <objtools/lds/lds_expt.hpp>#include <vector>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)/// Base class for sequence search functors.////// @internalclass CLDS_FindSeqIdBase{public:    CLDS_FindSeqIdBase(const vector<string>&  seqids,                       CLDS_Set*              obj_ids)    :      m_SeqIds(seqids),      m_ResultSet(obj_ids)    {        _ASSERT(obj_ids);    }    bool MatchSeqId(const CSeq_id& seq_id_db, const string& candidate_str)    {        CSeq_id seq_id(candidate_str);        if (seq_id.Which() == CSeq_id::e_not_set) {            seq_id.SetLocal().SetStr(candidate_str);            if (seq_id.Which() == CSeq_id::e_not_set) {                return false;            }        }        if (seq_id.Match(seq_id_db)) {            return true;        }        // Sequence does not match, lets try "force it local" strategy        //        if (seq_id.Which() != CSeq_id::e_Local) {            if (candidate_str.find('|') == NPOS) {                seq_id.SetLocal().SetStr(candidate_str);                if (seq_id.Which() != CSeq_id::e_Local) {                    return false;                }                if (seq_id.Match(seq_id_db)) {                    return true;                }            }        }        return false;    }private:    CLDS_FindSeqIdBase(const CLDS_FindSeqIdBase&);    CLDS_FindSeqIdBase& operator=(const CLDS_FindSeqIdBase&);protected:    const vector<string>&   m_SeqIds;    // Search criteria    CLDS_Set*               m_ResultSet; // Search result };/// Functor used for scanning the Berkeley DB database./// This functor is driven by the BDB_iterate_file algorithm,/// checks every object record to determine if it contains /// objects(molecules) satisfying the the given set of ids.////// @internalclass CLDS_FindSeqIdFunctor : public CLDS_FindSeqIdBase{public:    CLDS_FindSeqIdFunctor(SLDS_TablesCollection& db,                          const vector<string>&  seqids,                          CLDS_Set*              obj_ids)    : CLDS_FindSeqIdBase(seqids, obj_ids),      m_db(db)    {    }    void operator()(SLDS_ObjectDB& dbf)    {        if (dbf.primary_seqid.IsEmpty())            return;        int object_id = dbf.object_id;        int tse_id = dbf.TSE_object_id;        string seq_id_str(dbf.primary_seqid);        if (seq_id_str.empty())            return;        CSeq_id seq_id_db(seq_id_str);        if (seq_id_db.Which() == CSeq_id::e_not_set) {            seq_id_db.SetLocal().SetStr(seq_id_str);            if (seq_id_db.Which() == CSeq_id::e_not_set) {                return;            }        }        // Check the seqids vector against the primary seq id        //        ITERATE (vector<string>, it, m_SeqIds) {            if (MatchSeqId(seq_id_db, *it)) {                m_ResultSet->insert(tse_id ? tse_id : object_id);                return;            }        }        // Primary seq id gave no hit. Scanning the supplemental list (attributes)        ///*        m_db.object_attr_db.object_attr_id = object_id;        if (m_db.object_attr_db.Fetch() != eBDB_Ok) {            return;        }*/        if (dbf.seq_ids.IsNull() ||             dbf.seq_ids.IsEmpty()) {            return;        }        string attr_seq_ids(dbf.seq_ids);        vector<string> seq_id_arr;                NStr::Tokenize(attr_seq_ids, " ", seq_id_arr, NStr::eMergeDelims);        ITERATE (vector<string>, it, seq_id_arr) {            CSeq_id seq_id_db(*it);            if (seq_id_db.Which() == CSeq_id::e_not_set) {                seq_id_db.SetLocal().SetStr(*it);                if (seq_id_db.Which() == CSeq_id::e_not_set) {                    continue;                }            }            ITERATE (vector<string>, it2, m_SeqIds) {                if (MatchSeqId(seq_id_db, *it2)) {                    m_ResultSet->insert(tse_id ? tse_id : object_id);                    return;                }            }        }    }private:    CLDS_FindSeqIdFunctor(const CLDS_FindSeqIdFunctor&);    CLDS_FindSeqIdFunctor& operator=(const CLDS_FindSeqIdFunctor&);private:    SLDS_TablesCollection&  m_db;        // The LDS database};////// Functor used for scanning the SLDS_SeqId_List database.////// @internalclass CLDS_FindSeqIdListFunctor : public CLDS_FindSeqIdBase{public:    CLDS_FindSeqIdListFunctor(const vector<string>&  seqids,                              CLDS_Set*  obj_ids)    : CLDS_FindSeqIdBase(seqids, obj_ids)    {    }    void operator()(SLDS_SeqId_List& dbf)    {        if (dbf.seq_id.IsEmpty())            return;        const char* str_id = dbf.seq_id;                CSeq_id seq_id_db(str_id);        if (seq_id_db.Which() == CSeq_id::e_not_set) {            seq_id_db.SetLocal().SetStr((const char*)dbf.seq_id);            if (seq_id_db.Which() == CSeq_id::e_not_set) {                return;            }        }        int object_id = dbf.object_id;        ITERATE (vector<string>, it, m_SeqIds) {            if (MatchSeqId(seq_id_db, *it)) {                m_ResultSet->insert(object_id);                return;            }        }    }};inline string LDS_TypeMapSearch(const map<string, int>& type_map, int type){    typedef map<string, int> TName2Id;    ITERATE (TName2Id, it, type_map) {        if (it->second == type) {            return it->first;        }    }    return kEmptyStr;}/// Query scanner functor for objects////// @internalclass CLDS_IdTableScanner : public CBDB_FileScanner{public:    CLDS_IdTableScanner(CBDB_File& dbf, CLDS_Set* rec_ids)    : CBDB_FileScanner(dbf),      m_ResultSet(*rec_ids)    {}    virtual EScanAction OnRecordFound()    {        int rowid = BDB_get_rowid(m_File);        if (rowid) {            m_ResultSet.insert(rowid);        }        return eContinue;    }protected:    CLDS_Set&   m_ResultSet;};////////////////////////////////////////////////////////////////////// CLDS_Querybool CLDS_Query::FindFile(const string& path){    CBDB_FileCursor cur(m_db.file_db);    cur.SetCondition(CBDB_FileCursor::eFirst);    while (cur.Fetch() == eBDB_Ok) {        string fname(m_db.file_db.file_name);        if (fname == path) {            return true;        }    }    return false;}void CLDS_Query::FindSequences(const vector<string>& seqids,                                CLDS_Set* obj_ids){    CLDS_FindSeqIdFunctor search_func(m_db, seqids, obj_ids);    BDB_iterate_file(m_db.object_db, search_func);}void CLDS_Query::FindSeqIdList(const vector<string>& seqids, CLDS_Set* obj_ids){    CLDS_FindSeqIdListFunctor search_func(seqids, obj_ids);    BDB_iterate_file(m_db.seq_id_list, search_func);}void CLDS_Query::FindSequences(const string& query_str, CLDS_Set* obj_ids){    _ASSERT(obj_ids);    CLDS_IdTableScanner scanner(m_db.object_db, obj_ids);    CBDB_Query    query;    try {        BDB_ParseQuery(query_str.c_str(), &query);    } catch (CBDB_LibException&) {        return; // ignore errors    }    scanner.Scan(query); }CLDS_Query::SObjectDescr CLDS_Query::GetObjectDescr(const map<string, int>& type_map,                            int id,                           bool trace_to_top){    SObjectDescr descr;    // Check objects    //    m_db.object_db.object_id = id;    if (m_db.object_db.Fetch() == eBDB_Ok) {        int tse_id = m_db.object_db.TSE_object_id;                if (tse_id && trace_to_top) {            // If non-top level entry, call recursively redirected to            // the top level object            return GetObjectDescr(type_map, tse_id, trace_to_top);        }        descr.id = id;        descr.is_object = true;        int object_type = m_db.object_db.object_type;        descr.type_str = LDS_TypeMapSearch(type_map, object_type);        int file_id = m_db.object_db.file_id;        m_db.file_db.file_id = file_id;        if (m_db.file_db.Fetch() != eBDB_Ok) {            LDS_THROW(eRecordNotFound, "File record not found.");        }        descr.format = (CFormatGuess::EFormat)(int)m_db.file_db.format;        descr.file_name = m_db.file_db.file_name;        descr.offset = m_db.object_db.file_offset;        descr.title = m_db.object_db.object_title;/*        m_db.object_attr_db.object_attr_id = id;        if (m_db.object_attr_db.Fetch() == eBDB_Ok) {            descr.title = m_db.object_attr_db.object_title;        }*/        return descr;    }    // Check annotations    //    m_db.annot_db.annot_id = id;    if (m_db.annot_db.Fetch() == eBDB_Ok) {        int top_level_id = m_db.annot_db.top_level_id;                if (top_level_id && trace_to_top) {            // If non-top level entry, call recursively redirected to            // the top level object            return GetObjectDescr(type_map, top_level_id, trace_to_top);        }        descr.id = id;        descr.is_object = false;        int object_type = m_db.annot_db.annot_type;        descr.type_str = LDS_TypeMapSearch(type_map, object_type);        int file_id = m_db.annot_db.file_id;        m_db.file_db.file_id = file_id;        if (m_db.file_db.Fetch() != eBDB_Ok) {            LDS_THROW(eRecordNotFound, "File record not found.");        }        descr.format = (CFormatGuess::EFormat)(int)m_db.file_db.format;        descr.file_name = m_db.file_db.file_name;        descr.offset = m_db.annot_db.file_offset;        return descr;    }    descr.id = 0; // not found    return descr;}END_SCOPE(objects)END_NCBI_SCOPE/* * =========================================================================== * $Log: lds_query.cpp,v $ * Revision 1000.2  2004/06/01 19:45:50  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.12 * * Revision 1.12  2004/05/21 21:42:55  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.11  2004/03/11 18:43:41  kuznets * + FindSequences * * Revision 1.10  2004/03/09 17:16:59  kuznets * Merge object attributes with objects * * Revision 1.9  2003/08/06 20:49:13  kuznets * SObjectDescr::title handled in CLDS_Query::GetObjectDescr * * Revision 1.8  2003/07/14 19:48:04  kuznets * Minor changes to improve debugging * * Revision 1.7  2003/07/10 20:09:53  kuznets * Implemented GetObjectDescr query. Searches both objects and annotations. * * Revision 1.6  2003/07/09 19:32:10  kuznets * Added query scanning sequence id list. * * Revision 1.5  2003/06/27 14:36:45  kuznets * Fixed compilation problem with GCC * * Revision 1.4  2003/06/24 18:32:39  kuznets * Code clean up. Improved sequence id comparison. * * Revision 1.3  2003/06/24 15:40:30  kuznets * Working on sequence id scan search. Improved recognition of local sequences. * * Revision 1.2  2003/06/20 19:56:41  kuznets * Implemented new function "FindSequences" * * Revision 1.1  2003/06/16 14:55:00  kuznets * lds splitted into "lds" and "lds_admin" * * * =========================================================================== */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?