seq_vector_ci.cpp
来自「ncbi源码」· C++ 代码 · 共 923 行 · 第 1/2 页
CPP
923 行
/* * =========================================================================== * PRODUCTION $Log: seq_vector_ci.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 19:24:26 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.34 * PRODUCTION * =========================================================================== *//* $Id: seq_vector_ci.cpp,v 1000.3 2004/06/01 19:24:26 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aleksey Grichenko, Eugene Vasilchenko** File Description:* Seq-vector iterator**/#include <ncbi_pch.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_vector_ci.hpp>#include <objects/seq/NCBI8aa.hpp>#include <objects/seq/NCBIpaa.hpp>#include <objects/seq/NCBIstdaa.hpp>#include <objects/seq/NCBIeaa.hpp>#include <objects/seq/NCBIpna.hpp>#include <objects/seq/NCBI8na.hpp>#include <objects/seq/NCBI4na.hpp>#include <objects/seq/NCBI2na.hpp>#include <objects/seq/IUPACaa.hpp>#include <objects/seq/IUPACna.hpp>#include <algorithm>#include <objmgr/impl/seq_vector_cvt.hpp>#include <objmgr/objmgr_exception.hpp>#include <util/random_gen.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)// Internal class to put random bases in place of ambiguitiesconst size_t kRandomizerPosMask = 0x3f;const size_t kRandomDataSize = kRandomizerPosMask + 1;class CNcbi2naRandomizer : public CObject{public: // If seed == 0 then use random number for seed CNcbi2naRandomizer(CRandom& gen); ~CNcbi2naRandomizer(void); typedef char* TData; void RandomizeData(TData data, // cache to be randomized size_t count, // number of bases in the cache TSeqPos pos); // sequence pos of the cacheprivate: CNcbi2naRandomizer(const CNcbi2naRandomizer&); CNcbi2naRandomizer& operator=(const CNcbi2naRandomizer&); // First value in each row indicates ambiguity (1) or // normal base (0) typedef char TRandomData[kRandomDataSize + 1]; typedef TRandomData TRandomTable[16]; TRandomTable m_RandomTable;};CNcbi2naRandomizer::CNcbi2naRandomizer(CRandom& gen){ unsigned int bases[4]; // Count of each base in the random distribution for (int na4 = 0; na4 < 16; na4++) { int bit_count = 0; char set_bit = 0; for (int bit = 0; bit < 4; bit++) { // na4 == 0 is special case (gap) should be treated as 0xf if ( !na4 || (na4 & (1 << bit)) ) { bit_count++; bases[bit] = 1; set_bit = bit; } else { bases[bit] = 0; } } if (bit_count == 1) { // Single base m_RandomTable[na4][0] = 0; m_RandomTable[na4][1] = set_bit; continue; } // Ambiguity: create random distribution with possible bases m_RandomTable[na4][0] = bit_count; // need any non-zero value for (int bit = 0; bit < 4; bit++) { bases[bit] *= kRandomDataSize/bit_count + kRandomDataSize % bit_count; } for (int i = kRandomDataSize - 1; i >= 0; i--) { CRandom::TValue rnd = gen.GetRand(0, i); for (int base = 0; base < 4; base++) { if (!bases[base] || rnd > bases[base]) { rnd -= bases[base]; continue; } m_RandomTable[na4][i + 1] = base; bases[base]--; break; } } }}CNcbi2naRandomizer::~CNcbi2naRandomizer(void){ return;}void CNcbi2naRandomizer::RandomizeData(TData data, size_t count, TSeqPos pos){ for (TData stop = data + count; data < stop; ++data, ++pos) { if ( m_RandomTable[(unsigned char)(*data)][0] ) { // Ambiguity, use random value *data = m_RandomTable[(unsigned char)(*data)] [(pos & kRandomizerPosMask) + 1]; } else { // Normal base *data = m_RandomTable[(unsigned char)(*data)][1]; } }}static const TSeqPos kCacheSize = 1024;template<class DstIter, class SrcCont>inlinevoid copy_8bit_any(DstIter dst, size_t count, const SrcCont& srcCont, size_t srcPos, const char* table, bool reverse){ size_t endPos = srcPos + count; if ( endPos < srcPos || endPos > srcCont.size() ) { NCBI_THROW(CSeqVectorException, eOutOfRange, "reference out of range of Seq-inst data"); } if ( table ) { if ( reverse ) { copy_8bit_table_reverse(dst, count, srcCont, srcPos, table); } else { copy_8bit_table(dst, count, srcCont, srcPos, table); } } else { if ( reverse ) { copy_8bit_reverse(dst, count, srcCont, srcPos); } else { copy_8bit(dst, count, srcCont, srcPos); } }}template<class DstIter, class SrcCont>inlinevoid copy_4bit_any(DstIter dst, size_t count, const SrcCont& srcCont, size_t srcPos, const char* table, bool reverse){ size_t endPos = srcPos + count; if ( endPos < srcPos || endPos / 2 > srcCont.size() ) { NCBI_THROW(CSeqVectorException, eOutOfRange, "reference out of range of Seq-inst data"); } if ( table ) { if ( reverse ) { copy_4bit_table_reverse(dst, count, srcCont, srcPos, table); } else { copy_4bit_table(dst, count, srcCont, srcPos, table); } } else { if ( reverse ) { copy_4bit_reverse(dst, count, srcCont, srcPos); } else { copy_4bit(dst, count, srcCont, srcPos); } }}template<class DstIter, class SrcCont>inlinevoid copy_2bit_any(DstIter dst, size_t count, const SrcCont& srcCont, size_t srcPos, const char* table, bool reverse){ size_t endPos = srcPos + count; if ( endPos < srcPos || endPos / 4 > srcCont.size() ) { NCBI_THROW(CSeqVectorException, eOutOfRange, "reference out of range of Seq-inst data"); } if ( table ) { if ( reverse ) { copy_2bit_table_reverse(dst, count, srcCont, srcPos, table); } else { copy_2bit_table(dst, count, srcCont, srcPos, table); } } else { if ( reverse ) { copy_2bit_reverse(dst, count, srcCont, srcPos); } else { copy_2bit(dst, count, srcCont, srcPos); } }}// CSeqVector_CI::CSeqVector_CI::CSeqVector_CI(void) : m_Strand(eNa_strand_unknown), m_Coding(CSeq_data::e_not_set), m_Cache(0), m_CachePos(0), m_CacheData(0), m_CacheEnd(0), m_BackupPos(0), m_BackupData(0), m_BackupEnd(0){}CSeqVector_CI::~CSeqVector_CI(void){ x_DestroyCache();}CSeqVector_CI::CSeqVector_CI(const CSeqVector_CI& sv_it) : m_Strand(eNa_strand_unknown), m_Coding(CSeq_data::e_not_set), m_Cache(0), m_CachePos(0), m_CacheData(0), m_CacheEnd(0), m_BackupPos(0), m_BackupData(0), m_BackupEnd(0), m_Randomizer(sv_it.m_Randomizer){ try { *this = sv_it; } catch (...) { x_DestroyCache(); throw; }}CSeqVector_CI::CSeqVector_CI(const CSeqVector& seq_vector, TSeqPos pos) : m_SeqMap(seq_vector.m_SeqMap), m_Scope(seq_vector.m_Scope), m_Strand(seq_vector.m_Strand), m_Coding(seq_vector.m_Coding), m_Cache(0), m_CachePos(0), m_CacheData(0), m_CacheEnd(0), m_BackupPos(0), m_BackupData(0), m_BackupEnd(0){ try { x_SetPos(pos); } catch (...) { x_DestroyCache(); throw; }}void CSeqVector_CI::x_SetVector(CSeqVector& seq_vector){ if ( m_SeqMap ) { // reset old values m_Seg = CSeqMap_CI(); x_ResetCache(); x_ResetBackup(); } m_SeqMap = seq_vector.m_SeqMap; m_Scope = seq_vector.m_Scope; m_Strand = seq_vector.m_Strand; m_Coding = seq_vector.m_Coding; m_CachePos = seq_vector.size();}inlineTSeqPos CSeqVector_CI::x_GetSize(void) const{ return m_SeqMap->GetLength(m_Scope.GetScopeOrNull());}inlinevoid CSeqVector_CI::x_InitSeg(TSeqPos pos){ m_Seg = m_SeqMap->FindResolved(pos, m_Scope.GetScopeOrNull(), m_Strand);}void CSeqVector_CI::SetCoding(TCoding coding){ if ( m_Coding != coding ) { TSeqPos pos = GetPos(); x_ResetCache(); x_ResetBackup(); m_Coding = coding; x_SetPos(pos); }}CSeqVector_CI& CSeqVector_CI::operator=(const CSeqVector_CI& sv_it){ if ( this == &sv_it ) { return *this; } m_Scope = sv_it.m_Scope; m_SeqMap = sv_it.m_SeqMap; m_Strand = sv_it.m_Strand; m_Coding = sv_it.GetCoding(); m_Seg = sv_it.m_Seg; m_CachePos = sv_it.x_CachePos(); m_Randomizer = sv_it.m_Randomizer; // copy cache if any size_t cache_size = sv_it.x_CacheSize(); if ( cache_size ) { x_InitializeCache(); m_CacheEnd = m_CacheData + cache_size; m_Cache = m_CacheData + sv_it.x_CacheOffset(); memcpy(m_CacheData, sv_it.m_CacheData, cache_size); // copy backup cache if any size_t backup_size = sv_it.x_BackupSize(); if ( backup_size ) { m_BackupPos = sv_it.x_BackupPos(); m_BackupEnd = m_BackupData + backup_size; memcpy(m_BackupData, sv_it.m_BackupData, backup_size); } else { x_ResetBackup(); } } else { x_ResetCache(); x_ResetBackup(); } return *this;}void CSeqVector_CI::x_DestroyCache(void){ m_CachePos = GetPos(); delete[] m_CacheData; m_Cache = m_CacheData = m_CacheEnd = 0; delete[] m_BackupData; m_BackupData = m_BackupEnd = 0;}void CSeqVector_CI::x_InitializeCache(void){ if ( !m_Cache ) { m_Cache = m_CacheEnd = m_CacheData = new char[kCacheSize]; try { m_BackupEnd = m_BackupData = new char[kCacheSize]; } catch (...) { x_DestroyCache(); throw; } } else { x_ResetCache(); }}inlinevoid CSeqVector_CI::x_ResizeCache(size_t size){ _ASSERT(size <= kCacheSize); if ( !m_CacheData ) { x_InitializeCache(); } m_CacheEnd = m_CacheData + size; m_Cache = m_CacheData;}void CSeqVector_CI::x_UpdateCacheUp(TSeqPos pos){ _ASSERT(pos < x_GetSize()); TSeqPos segEnd = m_Seg.GetEndPosition(); _ASSERT(pos >= m_Seg.GetPosition() && pos < segEnd); TSeqPos cache_size = min(kCacheSize, segEnd - pos); x_FillCache(pos, cache_size); m_Cache = m_CacheData; _ASSERT(GetPos() == pos);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?