alnvec.cpp

来自「ncbi源码」· C++ 代码 · 共 1,047 行 · 第 1/3 页

CPP
1,047
字号
/* * =========================================================================== * PRODUCTION $Log: alnvec.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:40:49  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.59 * PRODUCTION * =========================================================================== *//*  $Id: alnvec.cpp,v 1000.2 2004/06/01 19:40:49 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Kamen Todorov, NCBI** File Description:*   Access to the actual aligned residues** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/alnmgr/alnvec.hpp>// Objects includes#include <objects/seq/Bioseq.hpp>#include <objects/seq/IUPACna.hpp>#include <objects/seq/Seq_descr.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>// Object Manager includes#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <util/tables/raw_scoremat.h>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::CAlnVec::CAlnVec(const CDense_seg& ds, CScope& scope)     : CAlnMap(ds),      m_Scope(&scope),      m_set_GapChar(false),      m_set_EndChar(false){}CAlnVec::CAlnVec(const CDense_seg& ds, TNumrow anchor, CScope& scope)    : CAlnMap(ds, anchor),      m_Scope(&scope),      m_set_GapChar(false),      m_set_EndChar(false){}CAlnVec::~CAlnVec(void){}const CBioseq_Handle& CAlnVec::GetBioseqHandle(TNumrow row) const{    TBioseqHandleCache::iterator i = m_BioseqHandlesCache.find(row);        if (i != m_BioseqHandlesCache.end()) {        return i->second;    } else {        CBioseq_Handle bioseq_handle =             GetScope().GetBioseqHandle(GetSeqId(row));        if (bioseq_handle) {            return m_BioseqHandlesCache[row] = bioseq_handle;        } else {            string errstr = string("CAlnVec::GetBioseqHandle(): ")                 + "Seq-id cannot be resolved: "                + GetSeqId(row).AsFastaString();                        NCBI_THROW(CAlnException, eInvalidSeqId, errstr);        }    }}CSeqVector& CAlnVec::x_GetSeqVector(TNumrow row) const{    TSeqVectorCache::iterator iter = m_SeqVectorCache.find(row);    if (iter != m_SeqVectorCache.end()) {        return *(iter->second);    } else {        CSeqVector vec = GetBioseqHandle(row).GetSeqVector            (CBioseq_Handle::eCoding_Iupac,             IsPositiveStrand(row) ?              CBioseq_Handle::eStrand_Plus :             CBioseq_Handle::eStrand_Minus);        CRef<CSeqVector> seq_vec(new CSeqVector(vec));        return *(m_SeqVectorCache[row] = seq_vec);    }}string& CAlnVec::GetAlnSeqString(string& buffer,                                 TNumrow row,                                 const TSignedRange& aln_rng) const{    string buff;    buffer.erase();    CSeqVector& seq_vec      = x_GetSeqVector(row);    TSeqPos     seq_vec_size = seq_vec.size();        // get the chunks which are aligned to seq on anchor    CRef<CAlnMap::CAlnChunkVec> chunk_vec =         GetAlnChunks(row, aln_rng, fSkipInserts | fSkipUnalignedGaps);        // for each chunk    for (int i=0; i<chunk_vec->size(); i++) {        CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];                        if (chunk->GetType() & fSeq) {            // add the sequence string            if (IsPositiveStrand(row)) {                seq_vec.GetSeqData(chunk->GetRange().GetFrom(),                                   chunk->GetRange().GetTo() + 1,                                   buff);            } else {                seq_vec.GetSeqData(seq_vec_size - chunk->GetRange().GetTo() - 1,                                   seq_vec_size - chunk->GetRange().GetFrom(),                                   buff);            }            buffer += buff;        } else {            // add appropriate number of gap/end chars            const int n = chunk->GetAlnRange().GetLength();            char* ch_buff = new char[n+1];            char fill_ch;            if (chunk->GetType() & fNoSeqOnLeft  ||                chunk->GetType() & fNoSeqOnRight) {                fill_ch = GetEndChar();            } else {                fill_ch = GetGapChar(row);            }            memset(ch_buff, fill_ch, n);            ch_buff[n] = 0;            buffer += ch_buff;            delete[] ch_buff;        }    }    return buffer;}string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,                                      string&       buffer,                                      TSeqPosList * insert_aln_starts,                                      TSeqPosList * insert_starts,                                      TSeqPosList * insert_lens,                                      unsigned int  scrn_width,                                      TSeqPosList * scrn_lefts,                                      TSeqPosList * scrn_rights) const{    TSeqPos       aln_pos = 0,        len = 0,        curr_pos = 0,        anchor_pos = 0,        scrn_pos = 0,        prev_len = 0,        ttl_len = 0;    TSignedSeqPos start = -1,        stop = -1,        scrn_lft_seq_pos = -1,        scrn_rgt_seq_pos = -1,        prev_aln_pos = -1,        prev_start = -1;    TNumseg       seg;    int           pos, nscrns, delta;        TSeqPos aln_len = GetAlnStop() + 1;    bool anchored = IsSetAnchor();    bool plus     = IsPositiveStrand(row);    int  width    = GetWidth(row);    scrn_width *= width;    const bool record_inserts = insert_starts && insert_lens;    const bool record_coords  = scrn_width && scrn_lefts && scrn_rights;    // allocate space for the row    char* c_buff = new char[aln_len + 1];    char* c_buff_ptr = c_buff;    string buff;        const TNumseg& left_seg = x_GetSeqLeftSeg(row);    const TNumseg& right_seg = x_GetSeqRightSeg(row);    // loop through all segments    for (seg = 0, pos = row, aln_pos = 0, anchor_pos = m_Anchor;         seg < m_NumSegs;         ++seg, pos += m_NumRows, anchor_pos += m_NumRows) {                const TSeqPos& seg_len = m_Lens[seg];        start = m_Starts[pos];        len = seg_len * width;        if (anchored  &&  m_Starts[anchor_pos] < 0) {            if (start >= 0) {                // record the insert if requested                if (record_inserts) {                    if (prev_aln_pos == (aln_pos / width)  &&                        start == (plus ? prev_start + prev_len :                                  prev_start - len)) {                        // consolidate the adjacent inserts                        ttl_len += len;                        insert_lens->pop_back();                        insert_lens->push_back(ttl_len);                        if (!plus) {                            insert_starts->pop_back();                            insert_starts->push_back(start);                        }                    } else {                        prev_aln_pos = aln_pos / width;                        ttl_len = len;                        insert_starts->push_back(start);                        insert_aln_starts->push_back(prev_aln_pos);                        insert_lens->push_back(len);                    }                    prev_start = start;                    prev_len = len;		}            }        } else {            if (start >= 0) {                stop = start + len - 1;                // add regular sequence to buffer                GetSeqString(buff, row, start, stop);                memcpy(c_buff_ptr, buff.c_str(), seg_len);                c_buff_ptr += seg_len;                                // take care of coords if necessary                if (record_coords) {                    if (scrn_lft_seq_pos < 0) {                        scrn_lft_seq_pos = plus ? start : stop;                        if (scrn_rgt_seq_pos < 0) {                            scrn_rgt_seq_pos = scrn_lft_seq_pos;                        }                    }                    // previous scrns                    nscrns = (aln_pos - scrn_pos) / scrn_width;                    for (int i = 0; i < nscrns; i++) {                        scrn_lefts->push_back(scrn_lft_seq_pos);                        scrn_rights->push_back(scrn_rgt_seq_pos);                        if (i == 0) {                            scrn_lft_seq_pos = plus ? start : stop;                        }                        scrn_pos += scrn_width;                    }                    if (nscrns > 0) {                        scrn_lft_seq_pos = plus ? start : stop;                    }                    // current scrns                    nscrns = (aln_pos + len - scrn_pos) / scrn_width;                    curr_pos = aln_pos;                    for (int i = 0; i < nscrns; i++) {                        delta = (plus ?                                 scrn_width - (curr_pos - scrn_pos) :                                 curr_pos - scrn_pos - scrn_width);                                                scrn_lefts->push_back(scrn_lft_seq_pos);                        if (plus ?                            scrn_lft_seq_pos < start :                            scrn_lft_seq_pos > stop) {                            scrn_lft_seq_pos = (plus ? start : stop) +                                delta;                            scrn_rgt_seq_pos = scrn_lft_seq_pos +                                (plus ? -1 : 1);                        } else {                            scrn_rgt_seq_pos = scrn_lft_seq_pos + (plus ? -1 : 1)                                + delta;                            scrn_lft_seq_pos += delta;                        }                        if (seg == left_seg  &&                            scrn_lft_seq_pos == scrn_rgt_seq_pos) {                            if (plus) {                                scrn_rgt_seq_pos--;                            } else {                                scrn_rgt_seq_pos++;                            }                        }                        scrn_rights->push_back(scrn_rgt_seq_pos);                        curr_pos = scrn_pos += scrn_width;                    }                    if (aln_pos + len <= scrn_pos) {                        scrn_lft_seq_pos = -1; // reset                    }                    scrn_rgt_seq_pos = plus ? stop : start;                }            } else {                // add appropriate number of gap/end chars                                char* ch_buff = new char[seg_len + 1];                char fill_ch;                                if (seg < left_seg  ||  seg > right_seg) {                    fill_ch = GetEndChar();                } else {                    fill_ch = GetGapChar(row);                }                                memset(ch_buff, fill_ch, seg_len);                ch_buff[seg_len] = 0;                memcpy(c_buff_ptr, ch_buff, seg_len);                c_buff_ptr += seg_len;                delete[] ch_buff;            }            aln_pos += len;        }    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?