seqdbalias.cpp

来自「ncbi源码」· C++ 代码 · 共 589 行 · 第 1/2 页

CPP
589
字号
/* * =========================================================================== * PRODUCTION $Log: seqdbalias.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 19:46:33  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.13 * PRODUCTION * =========================================================================== *//*  $Id: seqdbalias.cpp,v 1000.1 2004/06/01 19:46:33 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Kevin Bealer * */#include <ncbi_pch.hpp>#include <corelib/ncbistr.hpp>#include <corelib/ncbifile.hpp>#include <algorithm>#include "seqdbalias.hpp"#include "seqdbfile.hpp"BEGIN_NCBI_SCOPE/// Index file.////// Index files (extension nin or pin) contain information on where to/// find information in other files.  The OID is the (implied) key.// Public Constructor//// This is the user-visible constructor, which builds the top level// node in the dbalias node tree.  This design effectively treats the// user-input database list as if it were an alias file containing// only the DBLIST specification.CSeqDBAliasNode::CSeqDBAliasNode(const string & dbname_list,                                 char           prot_nucl,                                 bool           use_mmap){    string new_names(dbname_list);    x_ResolveNames(new_names, m_DBPath, prot_nucl);        set<string> recurse;        if (seqdb_debug_class & debug_alias) {        cout << "user list((" << dbname_list << "))<>";    }        m_Values["DBLIST"] = new_names;        x_ExpandAliases("-", prot_nucl, use_mmap, recurse);}// Private Constructor// // This is the constructor for nodes other than the top-level node.// As such it is private and only called from this class.// // This constructor constructs subnodes by calling x_ExpandAliases,// which calls this constructor again with the subnode's arguments.// But no node should be its own ancestor.  To prevent this kind of// recursive loop, each file adds its full path to a set of strings// and does not create a subnode for any path already in that set.// // The set (recurse) is passed BY VALUE so that two branches of the// same file can contain equivalent nodes.  A more efficient method// for allowing this kind of sharing might be to pass by reference,// removing the current node path from the set after construction.CSeqDBAliasNode::CSeqDBAliasNode(const string & dbpath,                                 const string & dbname,                                 char           prot_nucl,                                 bool           use_mmap,                                 set<string>    recurse)    : m_DBPath(dbpath){    if (seqdb_debug_class & debug_alias) {        bool comma = false;                cout << dbname << "<";        for(set<string>::iterator i = recurse.begin(); i != recurse.end(); i++) {            if (comma) {                cout << ",";            }            comma = true;            cout << SeqDB_GetFileName(*i);        }        cout << ">";    }        string full_filename( x_MkPath(m_DBPath, dbname, prot_nucl) );    recurse.insert(full_filename);        x_ReadValues(full_filename, use_mmap);    x_ExpandAliases(dbname, prot_nucl, use_mmap, recurse);}// This takes the names in dbname_list, finds the path for each name,// and recreates a space delimited version.  This is only done during// topmost node construction; names supplied by the end user get this// treatment, lower level nodes still need absolute or relative paths// to specify the database locations.// // After each name is resolved, the largest prefix is found and moved// to the m_DBPath variable.// // [I'm not sure if this is really worth while; it seemed like it// would be and it wasn't too bad to write.  It could probably be// omitted in the cliff notes version. -kmb]void CSeqDBAliasNode::x_ResolveNames(string & dbname_list,                                     string & dbname_path,                                     char     prot_nucl){    dbname_path = ".";        vector<string> namevec;    NStr::Tokenize(dbname_list, " ", namevec, NStr::eMergeDelims);        Uint4 i = 0;        for(i = 0; i < namevec.size(); i++) {        namevec[i] =            SeqDB_FindBlastDBPath(namevec[i], prot_nucl);                if (namevec[i].empty()) {            NCBI_THROW(CSeqDBException,                       eFileErr,                       "No alias or index file found.");        }    }        Uint4 common = namevec[0].size();        // Reduce common length to length of min db path.        for(i = 1; common && (i < namevec.size()); i++) {        if (namevec[i].size() < common) {            common = namevec.size();        }    }        if (common) {        --common;    }        // Reduce common length to largest universal prefix.        string & first = namevec[0];        for(i = 1; common && (i < namevec.size()); i++) {        // Reduce common prefix length until match is found.                while(string(first, 0, common) != string(namevec[i], 0, common)) {            --common;        }    }        // Adjust back to whole path component.        while(common && (first[common] != CFile::GetPathSeparator())) {        --common;    }        if (common) {        // Factor out common path components.                dbname_path.assign(first, 0, common);                for(i = 0; i < namevec.size(); i++) {            namevec[i].erase(0, common+1);        }    }        dbname_list = namevec[0];        for(i = 1; i < namevec.size(); i++) {        dbname_list += ' ';        dbname_list += namevec[i];    }}void CSeqDBAliasNode::x_ReadLine(const char * bp,                                 const char * ep){    const char * p = bp;        // If first nonspace char is '#', line is a comment, so skip.    if (*p == '#') {        return;    }        // Find name    const char * spacep = p;        while((spacep < ep) && (*spacep != ' '))        spacep ++;        string name(p, spacep);        // Find value    while((spacep < ep) && ((*spacep == ' ') || (*spacep == '\t')))        spacep ++;        string value(spacep, ep);        // Store in this nodes' dictionary.    m_Values[name] = value;}void CSeqDBAliasNode::x_ReadValues(const string  & fn,                                   bool            use_mmap){    CSeqDBMemPool mempool;    CSeqDBRawFile af(mempool, use_mmap);    af.Open(fn);        Uint4 file_length = (Uint4) af.GetFileLength();        const char * bp = af.GetRegion(0, file_length);    const char * ep = bp + file_length;    const char * p  = bp;        while(p < ep) {        // Skip spaces        while((p < ep) && (*p == ' ')) {            p++;        }                const char * eolp = p;                while((eolp < ep) && (*eolp != '\n')) {            eolp++;        }                // Non-empty line, so read it.        if (eolp != p) {            x_ReadLine(p, eolp);        }                p = eolp + 1;    }}void CSeqDBAliasNode::x_ExpandAliases(const string & this_name,                                      char           prot_nucl,                                      bool           use_mmap,                                      set<string>  & recurse){    vector<string> namevec;    string dblist( m_Values["DBLIST"] );    NStr::Tokenize(dblist, " ", namevec, NStr::eMergeDelims);        bool parens = false;        for(Uint4 i = 0; i<namevec.size(); i++) {        if (namevec[i] == SeqDB_GetBaseName(this_name)) {            // If the base name of the alias file is also listed in            // "dblist", it is assumed to refer to a volume instead of            // to itself.                        m_VolNames.push_back(this_name);            continue;        }                string new_db_loc( x_MkPath(m_DBPath, namevec[i], prot_nucl) );                if (recurse.find(new_db_loc) != recurse.end()) {            NCBI_THROW(CSeqDBException,

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?