seqdbfile.hpp

来自「ncbi源码」· HPP 代码 · 共 423 行

HPP
423
字号
/* * =========================================================================== * PRODUCTION $Log: seqdbfile.hpp,v $ * PRODUCTION Revision 1000.0  2004/04/15 15:05:47  gouriano * PRODUCTION PRODUCTION: IMPORTED [CATCHUP_003] Dev-tree R1.11 * PRODUCTION * =========================================================================== */#ifndef OBJTOOLS_READERS_SEQDB__SEQDBFILE_HPP#define OBJTOOLS_READERS_SEQDB__SEQDBFILE_HPP/*  $Id: seqdbfile.hpp,v 1000.0 2004/04/15 15:05:47 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Kevin Bealer * *//// File access objects for CSeqDB////// These objects define access to the various database component/// files, such as name.pin, name.phr, name.psq, and so on.#include "seqdbmempool.hpp"#include "seqdbgeneral.hpp"#include <corelib/ncbistr.hpp>#include <corelib/ncbifile.hpp>#include <corelib/ncbi_bswap.hpp>#include <corelib/ncbiobj.hpp>#include <objtools/readers/seqdb/seqdbcommon.hpp>#include <set>BEGIN_NCBI_SCOPE/// Raw file.////// This is the lowest level; it controls basic (byte data) access to/// the file, isolating higher levels from differences in handling/// mmapped vs opened files.class CSeqDBRawFile : public CObject {public:    CSeqDBRawFile(CSeqDBMemPool & mempool, bool use_mmap)        : m_Length (0),          m_Mapped (0),          m_MemPool(mempool),          m_Offset (0),          m_Opened (false),          m_UseMMap(use_mmap)    {    }        ~CSeqDBRawFile()    {        Clear();    }        /// MMap or Open a file.    bool Open(const string & name);        void Clear(void);        bool Valid(void) const    {        return (m_Mapped || m_Opened) ? true : false;    }        const char * GetRegion(Uint4 start, Uint4 end) const;        void SetFileOffset(Uint4 offset)    {        m_Offset = offset;    }        Uint4 GetFileOffset(void) const    {        return m_Offset;    }        Uint8 GetFileLength(void) const    {        return m_Length;    }        Uint8 Swap8(const char * input) const;        void ReadSwapped(Uint4 * z);    void ReadSwapped(Uint8 * z);    void ReadSwapped(string * z);        // Does not modify (or use) internal file offset    bool ReadBytes(char * z, Uint4 start, Uint4 end) const;    private:    bool x_ValidGet(Uint4 start, Uint4 end, Uint4 filesize) const    {        return (start <= end) && (end <= filesize);    }        bool x_ReadFileRegion(char * region, Uint4 start, Uint4 end) const;        void x_SetLength(bool);            // Data        mutable CFastMutex    m_FileLock;    Uint8                 m_Length;    CMemoryFile         * m_Mapped;    CSeqDBMemPool       & m_MemPool;    string                m_Name;    Uint4                 m_Offset;    bool                  m_Opened;    mutable CNcbiIfstream m_Stream;    bool                  m_UseMMap;};inline Uint8 CSeqDBRawFile::Swap8(const char * input) const{    unsigned char * bytes = (unsigned char *) input;            Uint8 value = bytes[7];    for(Int4 i = 7; i >= 0; i--) {        value += bytes[i];        if(i) value <<= 8;    }            value = 0;    for(Int4 i = 7; i >= 0; i--) {        value += bytes[i];        if(i) value <<= 8;    }            return value;}/// Database component file////// This represents any database component file with an extension like/// "pxx" or "nxx".  This finds the correct type (protein or/// nucleotide) if that is unknown, and computes the filename based on/// a filename template like "path/to/file/basename.-in".////// This also provides a 'protected' interface to the specific db/// files, and defines a few useful methods.class CSeqDBExtFile : public CObject {public:    CSeqDBExtFile(CSeqDBMemPool & mempool,                  const string  & dbfilename,                  char            prot_nucl,                  bool            use_mmap);        virtual ~CSeqDBExtFile()    {    }    protected:    const char * x_GetRegion(Uint4 start, Uint4 end) const    {        return m_File.GetRegion(start, end);    }        Uint4 x_GetFileOffset(void) const    {        return m_File.GetFileOffset();    }        void x_SetFileOffset(Uint4 offset)    {        m_File.SetFileOffset(offset);    }        void x_ReadBytes(char * x, Uint4 start, Uint4 end) const    {        m_File.ReadBytes(x, start, end);    }        template<class T>    void x_ReadSwapped(T * x)    {        m_File.ReadSwapped(x);    }        char x_GetSeqType(void) const    {        return m_ProtNucl;    }    private:    void x_SetFileType(char prot_nucl);        // Data        string        m_FileName;    char          m_ProtNucl;    CSeqDBRawFile m_File;};void inline CSeqDBExtFile::x_SetFileType(char prot_nucl){    m_ProtNucl = prot_nucl;        if ((m_ProtNucl != kSeqTypeProt) &&        (m_ProtNucl != kSeqTypeNucl) &&        (m_ProtNucl != kSeqTypeUnkn)) {                NCBI_THROW(CSeqDBException, eArgErr,                   "Invalid argument: seq type must be 'p' or 'n'.");    }        _ASSERT(m_FileName.size() >= 5);        m_FileName[m_FileName.size() - 3] = m_ProtNucl;}/// Index files////// This is the .pin or .nin file; it provides indices into the other/// files.  The version, title, date, and other summary information is/// also stored here.class CSeqDBIdxFile : public CSeqDBExtFile {public:    CSeqDBIdxFile(CSeqDBMemPool & mempool,                  const string  & dbname,                  char            prot_nucl,                  bool            use_mmap);        virtual ~CSeqDBIdxFile()    {    }        bool GetSeqStartEnd(Uint4 oid, Uint4 & start, Uint4 & end) const;        bool GetHdrStartEnd(Uint4 oid, Uint4 & start, Uint4 & end) const;        bool GetAmbStartEnd(Uint4 oid, Uint4 & start, Uint4 & end) const;        char GetSeqType(void) const    {        return x_GetSeqType();    }        string GetTitle(void) const    {        return m_Title;    }        string GetDate(void) const    {        return m_Date;    }        Uint4 GetNumSeqs(void) const    {        return m_NumSeqs;    }        Uint8 GetTotalLength(void) const    {        return m_TotLen;    }        Uint4 GetMaxLength(void) const    {        return m_MaxLen;    }    private:    Uint4 x_GetSeqOffset(int oid) const    {	return SeqDB_GetStdOrd( (const Uint4 *)(m_SeqHandle + (oid * 4)) );    }        Uint4 x_GetAmbCharOffset(int oid) const    {        return SeqDB_GetStdOrd( (const Uint4 *)(m_AmbCharHandle + (oid * 4)) );    }        Uint4 x_GetHdrOffset(int oid) const    {        return SeqDB_GetStdOrd( (const Uint4 *)(m_HdrHandle + (oid * 4)) );    }        // Swapped data from .[pn]in file        string m_Title;   // L3083,3087    string m_Date;    // L3094,3097    Uint4  m_NumSeqs; // L3101    Uint8  m_TotLen;  // L3110    Uint4  m_MaxLen;  // L3126        // Other pointers and indices        const char * m_HdrHandle;    const char * m_SeqHandle;    const char * m_AmbCharHandle;};bool inlineCSeqDBIdxFile::GetSeqStartEnd(Uint4 oid, Uint4 & start, Uint4 & end) const{    start = x_GetSeqOffset(oid);            if (kSeqTypeProt == x_GetSeqType()) {        end = x_GetSeqOffset(oid + 1);    } else {        end = x_GetAmbCharOffset(oid);    }            return true;}    bool inlineCSeqDBIdxFile::GetHdrStartEnd(Uint4 oid, Uint4 & start, Uint4 & end) const{    start = x_GetHdrOffset(oid);    end   = x_GetHdrOffset(oid + 1);            return true;}    bool inlineCSeqDBIdxFile::GetAmbStartEnd(Uint4 oid, Uint4 & start, Uint4 & end) const{    if (kSeqTypeNucl == x_GetSeqType()) {        start = x_GetAmbCharOffset(oid);        end   = x_GetSeqOffset(oid + 1);                if (start <= end) {            return true;        }    }        return false;}class CSeqDBSeqFile : public CSeqDBExtFile {public:    CSeqDBSeqFile(CSeqDBMemPool & mempool,                  const string  & dbname,                  char            prot_nucl,                  bool            use_mmap)        : CSeqDBExtFile(mempool, dbname + ".-sq", prot_nucl, use_mmap)    {    }        virtual ~CSeqDBSeqFile()    {    }        void ReadBytes(char * x, Uint4 start, Uint4 end) const    {        x_ReadBytes(x, start, end);    }        const char * GetRegion(Uint4 start, Uint4 end) const    {        return x_GetRegion(start, end);    }};class CSeqDBHdrFile : public CSeqDBExtFile {public:    CSeqDBHdrFile(CSeqDBMemPool & mempool,                  const string  & dbname,                  char            prot_nucl,                  bool            use_mmap)        : CSeqDBExtFile(mempool, dbname + ".-hr", prot_nucl, use_mmap)    {    }        virtual ~CSeqDBHdrFile()    {    }        void ReadBytes(char * x, Uint4 start, Uint4 end) const    {        x_ReadBytes(x, start, end);    }        const char * GetRegion(Uint4 start, Uint4 end) const    {        return x_GetRegion(start, end);    }};END_NCBI_SCOPE#endif // OBJTOOLS_READERS_SEQDB__SEQDBFILE_HPP

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?