seq_vector_ci.cpp

来自「ncbi源码」· C++ 代码 · 共 923 行 · 第 1/2 页

CPP
923
字号
/* * =========================================================================== * PRODUCTION $Log: seq_vector_ci.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 19:24:26  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.34 * PRODUCTION * =========================================================================== *//*  $Id: seq_vector_ci.cpp,v 1000.3 2004/06/01 19:24:26 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aleksey Grichenko, Eugene Vasilchenko** File Description:*   Seq-vector iterator**/#include <ncbi_pch.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_vector_ci.hpp>#include <objects/seq/NCBI8aa.hpp>#include <objects/seq/NCBIpaa.hpp>#include <objects/seq/NCBIstdaa.hpp>#include <objects/seq/NCBIeaa.hpp>#include <objects/seq/NCBIpna.hpp>#include <objects/seq/NCBI8na.hpp>#include <objects/seq/NCBI4na.hpp>#include <objects/seq/NCBI2na.hpp>#include <objects/seq/IUPACaa.hpp>#include <objects/seq/IUPACna.hpp>#include <algorithm>#include <objmgr/impl/seq_vector_cvt.hpp>#include <objmgr/objmgr_exception.hpp>#include <util/random_gen.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)// Internal class to put random bases in place of ambiguitiesconst size_t kRandomizerPosMask = 0x3f;const size_t kRandomDataSize    = kRandomizerPosMask + 1;class CNcbi2naRandomizer : public CObject{public:    // If seed == 0 then use random number for seed    CNcbi2naRandomizer(CRandom& gen);    ~CNcbi2naRandomizer(void);    typedef char* TData;    void RandomizeData(TData data,    // cache to be randomized                       size_t count,  // number of bases in the cache                       TSeqPos pos);  // sequence pos of the cacheprivate:    CNcbi2naRandomizer(const CNcbi2naRandomizer&);    CNcbi2naRandomizer& operator=(const CNcbi2naRandomizer&);    // First value in each row indicates ambiguity (1) or    // normal base (0)    typedef char        TRandomData[kRandomDataSize + 1];    typedef TRandomData TRandomTable[16];    TRandomTable m_RandomTable;};CNcbi2naRandomizer::CNcbi2naRandomizer(CRandom& gen){    unsigned int bases[4]; // Count of each base in the random distribution    for (int na4 = 0; na4 < 16; na4++) {        int bit_count = 0;        char set_bit = 0;        for (int bit = 0; bit < 4; bit++) {            // na4 == 0 is special case (gap) should be treated as 0xf            if ( !na4  ||  (na4 & (1 << bit)) ) {                bit_count++;                bases[bit] = 1;                set_bit = bit;            }            else {                bases[bit] = 0;            }        }        if (bit_count == 1) {            // Single base            m_RandomTable[na4][0] = 0;            m_RandomTable[na4][1] = set_bit;            continue;        }        // Ambiguity: create random distribution with possible bases        m_RandomTable[na4][0] = bit_count; // need any non-zero value        for (int bit = 0; bit < 4; bit++) {            bases[bit] *= kRandomDataSize/bit_count +                kRandomDataSize % bit_count;        }        for (int i = kRandomDataSize - 1; i >= 0; i--) {            CRandom::TValue rnd = gen.GetRand(0, i);            for (int base = 0; base < 4; base++) {                if (!bases[base]  ||  rnd > bases[base]) {                    rnd -= bases[base];                    continue;                }                m_RandomTable[na4][i + 1] = base;                bases[base]--;                break;            }        }    }}CNcbi2naRandomizer::~CNcbi2naRandomizer(void){    return;}void CNcbi2naRandomizer::RandomizeData(TData data,                                       size_t count,                                       TSeqPos pos){    for (TData stop = data + count; data < stop; ++data, ++pos) {        if ( m_RandomTable[(unsigned char)(*data)][0] ) {            // Ambiguity, use random value            *data = m_RandomTable[(unsigned char)(*data)]                [(pos & kRandomizerPosMask) + 1];        }        else {            // Normal base            *data = m_RandomTable[(unsigned char)(*data)][1];        }    }}static const TSeqPos kCacheSize = 1024;template<class DstIter, class SrcCont>inlinevoid copy_8bit_any(DstIter dst, size_t count,                   const SrcCont& srcCont, size_t srcPos,                   const char* table, bool reverse){    size_t endPos = srcPos + count;    if ( endPos < srcPos || endPos > srcCont.size() ) {        NCBI_THROW(CSeqVectorException, eOutOfRange,                   "reference out of range of Seq-inst data");    }    if ( table ) {        if ( reverse ) {            copy_8bit_table_reverse(dst, count, srcCont, srcPos, table);        }        else {            copy_8bit_table(dst, count, srcCont, srcPos, table);        }    }    else {        if ( reverse ) {            copy_8bit_reverse(dst, count, srcCont, srcPos);        }        else {            copy_8bit(dst, count, srcCont, srcPos);        }    }}template<class DstIter, class SrcCont>inlinevoid copy_4bit_any(DstIter dst, size_t count,                   const SrcCont& srcCont, size_t srcPos,                   const char* table, bool reverse){    size_t endPos = srcPos + count;    if ( endPos < srcPos || endPos / 2 > srcCont.size() ) {        NCBI_THROW(CSeqVectorException, eOutOfRange,                   "reference out of range of Seq-inst data");    }    if ( table ) {        if ( reverse ) {            copy_4bit_table_reverse(dst, count, srcCont, srcPos, table);        }        else {            copy_4bit_table(dst, count, srcCont, srcPos, table);        }    }    else {        if ( reverse ) {            copy_4bit_reverse(dst, count, srcCont, srcPos);        }        else {            copy_4bit(dst, count, srcCont, srcPos);        }    }}template<class DstIter, class SrcCont>inlinevoid copy_2bit_any(DstIter dst, size_t count,                   const SrcCont& srcCont, size_t srcPos,                   const char* table, bool reverse){    size_t endPos = srcPos + count;    if ( endPos < srcPos || endPos / 4 > srcCont.size() ) {        NCBI_THROW(CSeqVectorException, eOutOfRange,                   "reference out of range of Seq-inst data");    }    if ( table ) {        if ( reverse ) {            copy_2bit_table_reverse(dst, count, srcCont, srcPos, table);        }        else {            copy_2bit_table(dst, count, srcCont, srcPos, table);        }    }    else {        if ( reverse ) {            copy_2bit_reverse(dst, count, srcCont, srcPos);        }        else {            copy_2bit(dst, count, srcCont, srcPos);        }    }}// CSeqVector_CI::CSeqVector_CI::CSeqVector_CI(void)    : m_Strand(eNa_strand_unknown),      m_Coding(CSeq_data::e_not_set),      m_Cache(0),      m_CachePos(0),      m_CacheData(0),      m_CacheEnd(0),      m_BackupPos(0),      m_BackupData(0),      m_BackupEnd(0){}CSeqVector_CI::~CSeqVector_CI(void){    x_DestroyCache();}CSeqVector_CI::CSeqVector_CI(const CSeqVector_CI& sv_it)    : m_Strand(eNa_strand_unknown),      m_Coding(CSeq_data::e_not_set),      m_Cache(0),      m_CachePos(0),      m_CacheData(0),      m_CacheEnd(0),      m_BackupPos(0),      m_BackupData(0),      m_BackupEnd(0),      m_Randomizer(sv_it.m_Randomizer){    try {        *this = sv_it;    }    catch (...) {        x_DestroyCache();        throw;    }}CSeqVector_CI::CSeqVector_CI(const CSeqVector& seq_vector, TSeqPos pos)    : m_SeqMap(seq_vector.m_SeqMap),      m_Scope(seq_vector.m_Scope),      m_Strand(seq_vector.m_Strand),      m_Coding(seq_vector.m_Coding),      m_Cache(0),      m_CachePos(0),      m_CacheData(0),      m_CacheEnd(0),      m_BackupPos(0),      m_BackupData(0),      m_BackupEnd(0){    try {        x_SetPos(pos);    }    catch (...) {        x_DestroyCache();        throw;    }}void CSeqVector_CI::x_SetVector(CSeqVector& seq_vector){    if ( m_SeqMap ) {        // reset old values        m_Seg = CSeqMap_CI();        x_ResetCache();        x_ResetBackup();    }    m_SeqMap = seq_vector.m_SeqMap;    m_Scope  = seq_vector.m_Scope;    m_Strand = seq_vector.m_Strand;    m_Coding = seq_vector.m_Coding;    m_CachePos = seq_vector.size();}inlineTSeqPos CSeqVector_CI::x_GetSize(void) const{    return m_SeqMap->GetLength(m_Scope.GetScopeOrNull());}inlinevoid CSeqVector_CI::x_InitSeg(TSeqPos pos){    m_Seg = m_SeqMap->FindResolved(pos, m_Scope.GetScopeOrNull(), m_Strand);}void CSeqVector_CI::SetCoding(TCoding coding){    if ( m_Coding != coding ) {        TSeqPos pos = GetPos();        x_ResetCache();        x_ResetBackup();        m_Coding = coding;        x_SetPos(pos);    }}CSeqVector_CI& CSeqVector_CI::operator=(const CSeqVector_CI& sv_it){    if ( this == &sv_it ) {        return *this;    }    m_Scope = sv_it.m_Scope;    m_SeqMap = sv_it.m_SeqMap;    m_Strand = sv_it.m_Strand;    m_Coding = sv_it.GetCoding();    m_Seg = sv_it.m_Seg;    m_CachePos = sv_it.x_CachePos();    m_Randomizer = sv_it.m_Randomizer;    // copy cache if any    size_t cache_size = sv_it.x_CacheSize();    if ( cache_size ) {        x_InitializeCache();        m_CacheEnd = m_CacheData + cache_size;        m_Cache = m_CacheData + sv_it.x_CacheOffset();        memcpy(m_CacheData, sv_it.m_CacheData, cache_size);        // copy backup cache if any        size_t backup_size = sv_it.x_BackupSize();        if ( backup_size ) {            m_BackupPos = sv_it.x_BackupPos();            m_BackupEnd = m_BackupData + backup_size;            memcpy(m_BackupData, sv_it.m_BackupData, backup_size);        }        else {            x_ResetBackup();        }    }    else {        x_ResetCache();        x_ResetBackup();    }    return *this;}void CSeqVector_CI::x_DestroyCache(void){    m_CachePos = GetPos();    delete[] m_CacheData;    m_Cache = m_CacheData = m_CacheEnd = 0;    delete[] m_BackupData;    m_BackupData = m_BackupEnd = 0;}void CSeqVector_CI::x_InitializeCache(void){    if ( !m_Cache ) {        m_Cache = m_CacheEnd = m_CacheData = new char[kCacheSize];        try {            m_BackupEnd = m_BackupData = new char[kCacheSize];        }        catch (...) {            x_DestroyCache();            throw;        }    }    else {        x_ResetCache();    }}inlinevoid CSeqVector_CI::x_ResizeCache(size_t size){    _ASSERT(size <= kCacheSize);    if ( !m_CacheData ) {        x_InitializeCache();    }    m_CacheEnd = m_CacheData + size;    m_Cache = m_CacheData;}void CSeqVector_CI::x_UpdateCacheUp(TSeqPos pos){    _ASSERT(pos < x_GetSize());    TSeqPos segEnd = m_Seg.GetEndPosition();    _ASSERT(pos >= m_Seg.GetPosition() && pos < segEnd);    TSeqPos cache_size = min(kCacheSize, segEnd - pos);    x_FillCache(pos, cache_size);    m_Cache = m_CacheData;    _ASSERT(GetPos() == pos);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?