seqdboidlist.cpp
来自「ncbi源码」· C++ 代码 · 共 359 行
CPP
359 行
/* * =========================================================================== * PRODUCTION $Log: seqdboidlist.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:46:51 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.9 * PRODUCTION * =========================================================================== *//* $Id: seqdboidlist.cpp,v 1000.1 2004/06/01 19:46:51 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Kevin Bealer * */#include <ncbi_pch.hpp>#include <corelib/ncbistr.hpp>#include "seqdboidlist.hpp"#include "seqdbfile.hpp"BEGIN_NCBI_SCOPECSeqDBOIDList::CSeqDBOIDList(CSeqDBVolSet & volset, bool use_mmap) : m_NumOIDs (0), m_Bits (0), m_BitEnd (0){ _ASSERT( volset.HasMask() ); if (volset.HasSimpleMask()) { x_Setup( volset.GetSimpleMask(), use_mmap ); } else { x_Setup( volset, use_mmap ); }}CSeqDBOIDList::~CSeqDBOIDList(){}void CSeqDBOIDList::x_Setup(const string & filename, bool use_mmap){ m_RawFile.Reset( new CSeqDBRawFile(m_MemPool, use_mmap) ); m_RawFile->Open(filename); m_RawFile->ReadSwapped(& m_NumOIDs); Uint4 file_length = (Uint4) m_RawFile->GetFileLength(); m_Bits = (unsigned char*) m_RawFile->GetRegion(sizeof(Int4), file_length); m_BitEnd = m_Bits + file_length - sizeof(Int4);}// The general rule I am following in these methods is to use byte// computations except during actual looping.void CSeqDBOIDList::x_Setup(CSeqDBVolSet & volset, bool use_mmap){ _ASSERT(volset.HasMask() && (! volset.HasSimpleMask())); // First, get the memory space, clear it. // Pad memory space to word boundary. Uint4 num_oids = volset.GetNumSeqs(); Uint4 byte_length = ((num_oids + 31) / 32) * 4; m_Bits = (TUC*) m_MemPool.Alloc(byte_length); m_BitEnd = m_Bits + byte_length; memset((void*) m_Bits, 0, byte_length); // Then get the list of filenames and offsets to overlay onto it. for(Uint4 i = 0; i < volset.GetNumVols(); i++) { bool all_oids (false); list<string> mask_files; Uint4 oid_start(0); Uint4 oid_end (0); volset.GetMaskFiles(i, all_oids, mask_files, oid_start, oid_end); if (all_oids) { x_SetBitRange(oid_start, oid_end); } else { // For each file, copy bits into array. for(list<string>::iterator mask_iter = mask_files.begin(); mask_iter != mask_files.end(); ++mask_iter) { x_OrFileBits(*mask_iter, oid_start, oid_end, use_mmap); } } } m_NumOIDs = num_oids; while(m_NumOIDs && (! x_IsSet(m_NumOIDs - 1))) { -- m_NumOIDs; } if (seqdb_debug_class & debug_oid) { cout << "x_Setup: Dumping OID map data." << endl; unsigned cnt = 0; cout << hex; for(TCUC * bp = m_Bits; bp < m_BitEnd; bp ++) { unsigned int ubp = (unsigned int)(*bp); if (ubp >= 16) { cout << ubp << " "; } else { cout << "0" << ubp << " "; } cnt++; if (cnt == 32) { cout << "\n"; cnt = 0; } } cout << dec << "\n" << endl; }}// oid_end is not used - it could be. One use would be to trim the// "incoming bits" to that length; specifically, to assume that the// file may contain nonzero "junk data" after the official end point.//// This implies that two oid sets share the oid mask file, but one// used a smaller subset of that file. That really should never// happen; it would be so unlikely for that optimization to "buy// anything" that the code would almost certainly never be written// that way. For this reason, I have not yet implemented trimming.void CSeqDBOIDList::x_OrFileBits(const string & mask_fname, Uint4 oid_start, Uint4 /*oid_end*/, bool use_mmap){ // Open file and get pointers TCUC* bitmap = 0; TCUC* bitend = 0; CSeqDBMemPool mempool; CSeqDBRawFile volmask(mempool, use_mmap); { Uint4 num_oids = 0; volmask.Open(mask_fname); volmask.ReadSwapped(& num_oids); Uint4 file_length = (Uint4) volmask.GetFileLength(); // Cast forces signed/unsigned conversion. bitmap = (TCUC*) volmask.GetRegion(sizeof(Int4), file_length); //bitend = bitmap + file_length - sizeof(Int4); bitend = bitmap + (((num_oids + 31) / 32) * 4); } // Fold bitmap/bitend into m_Bits/m_BitEnd at bit offset oid_start. if (0 == (oid_start & 31)) { // If the new data is "word aligned", we can use a fast algorithm. TCUC * srcp = bitmap; TUC * locp = m_Bits + (oid_start / 8); TUC * endp = locp + (bitend-bitmap); _ASSERT(endp <= m_BitEnd); Uint4 * wsrcp = (Uint4*) srcp; Uint4 * wlocp = (Uint4*) locp; Uint4 * wendp = wlocp + ((bitend - bitmap) / 4); while(wlocp < wendp) { *wlocp++ |= *wsrcp++; } srcp = (TCUC*) wsrcp; locp = (unsigned char*) wlocp; while(locp < endp) { *locp++ |= *(srcp++); } } else if (0 == (oid_start & 7)) { // If the new data is "byte aligned", we can use a less fast algorithm. TCUC * srcp = bitmap; TUC * locp = m_Bits + (oid_start / 8); TUC * endp = locp + (bitend-bitmap); _ASSERT(endp <= m_BitEnd); while(locp < endp) { *locp++ |= *srcp++; } } else { // Otherwise... we have to use a slower, byte splicing algorithm. Uint4 Rshift = oid_start & 7; Uint4 Lshift = 8 - Rshift; TCUC * srcp = bitmap; TUC * locp = m_Bits + (oid_start / 8); TUC * endp = locp + (bitend-bitmap); _ASSERT(endp <= m_BitEnd); TCUC * endp2 = endp - 1; // This loop iterates over the source bytes. Each byte is // split over two destination bytes. while(locp < endp2) { // Store left half of source char in one location. TCUC source = *srcp; *locp |= (source >> Rshift); locp++; // Store right half of source in the next location. *locp |= (source << Lshift); srcp++; } }}void CSeqDBOIDList::x_SetBitRange(Uint4 oid_start, Uint4 oid_end){ // Set bits at the front and back, closing to a range of full-byte // addresses. while((oid_start & 0x7) && (oid_start < oid_end)) { x_SetBit(oid_start); ++oid_start; } while((oid_end & 0x7) && (oid_start < oid_end)) { x_SetBit(oid_end - 1); --oid_end; } if (oid_start < oid_end) { TUC * bp_start = m_Bits + (oid_start >> 3); TUC * bp_end = m_Bits + (oid_end >> 3); _ASSERT(bp_end <= m_BitEnd); _ASSERT(bp_start < bp_end); memset(bp_start, 0xFF, (bp_end - bp_start)); }}void CSeqDBOIDList::x_SetBit(TOID oid){ TUC * bp = m_Bits + (oid >> 3); Int4 bitnum = (oid & 7); if (bp < m_BitEnd) { *bp |= (0x80 >> bitnum); }}bool CSeqDBOIDList::x_IsSet(TOID oid) const{ TCUC * bp = m_Bits + (oid >> 3); Int4 bitnum = (oid & 7); if (bp < m_BitEnd) { if (*bp & (0x80 >> bitnum)) { return true; } } return false;}bool CSeqDBOIDList::x_FindNext(TOID & oid) const{ // If the specified OID is valid, use it. if (x_IsSet(oid)) { return true; } // OPTIONAL portion Uint4 whole_word_oids = m_NumOIDs & -32; while(oid < whole_word_oids) { if (x_IsSet(oid)) { return true; } oid ++; if ((oid & 31) == 0) { const Uint4 * bp = ((const Uint4*) m_Bits + (oid >> 5)); const Uint4 * ep = ((const Uint4*) m_Bits + (whole_word_oids >> 5)); while((bp < ep) && (0 == *bp)) { ++ bp; oid += 32; } } } // END of OPTIONAL portion while(oid < m_NumOIDs) { if (x_IsSet(oid)) { return true; } oid++; } return false;}END_NCBI_SCOPE
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?