alnvec.cpp
来自「ncbi源码」· C++ 代码 · 共 1,047 行 · 第 1/3 页
CPP
1,047 行
/* * =========================================================================== * PRODUCTION $Log: alnvec.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:40:49 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.59 * PRODUCTION * =========================================================================== *//* $Id: alnvec.cpp,v 1000.2 2004/06/01 19:40:49 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Kamen Todorov, NCBI** File Description:* Access to the actual aligned residues** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/alnmgr/alnvec.hpp>// Objects includes#include <objects/seq/Bioseq.hpp>#include <objects/seq/IUPACna.hpp>#include <objects/seq/Seq_descr.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>// Object Manager includes#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <util/tables/raw_scoremat.h>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::CAlnVec::CAlnVec(const CDense_seg& ds, CScope& scope) : CAlnMap(ds), m_Scope(&scope), m_set_GapChar(false), m_set_EndChar(false){}CAlnVec::CAlnVec(const CDense_seg& ds, TNumrow anchor, CScope& scope) : CAlnMap(ds, anchor), m_Scope(&scope), m_set_GapChar(false), m_set_EndChar(false){}CAlnVec::~CAlnVec(void){}const CBioseq_Handle& CAlnVec::GetBioseqHandle(TNumrow row) const{ TBioseqHandleCache::iterator i = m_BioseqHandlesCache.find(row); if (i != m_BioseqHandlesCache.end()) { return i->second; } else { CBioseq_Handle bioseq_handle = GetScope().GetBioseqHandle(GetSeqId(row)); if (bioseq_handle) { return m_BioseqHandlesCache[row] = bioseq_handle; } else { string errstr = string("CAlnVec::GetBioseqHandle(): ") + "Seq-id cannot be resolved: " + GetSeqId(row).AsFastaString(); NCBI_THROW(CAlnException, eInvalidSeqId, errstr); } }}CSeqVector& CAlnVec::x_GetSeqVector(TNumrow row) const{ TSeqVectorCache::iterator iter = m_SeqVectorCache.find(row); if (iter != m_SeqVectorCache.end()) { return *(iter->second); } else { CSeqVector vec = GetBioseqHandle(row).GetSeqVector (CBioseq_Handle::eCoding_Iupac, IsPositiveStrand(row) ? CBioseq_Handle::eStrand_Plus : CBioseq_Handle::eStrand_Minus); CRef<CSeqVector> seq_vec(new CSeqVector(vec)); return *(m_SeqVectorCache[row] = seq_vec); }}string& CAlnVec::GetAlnSeqString(string& buffer, TNumrow row, const TSignedRange& aln_rng) const{ string buff; buffer.erase(); CSeqVector& seq_vec = x_GetSeqVector(row); TSeqPos seq_vec_size = seq_vec.size(); // get the chunks which are aligned to seq on anchor CRef<CAlnMap::CAlnChunkVec> chunk_vec = GetAlnChunks(row, aln_rng, fSkipInserts | fSkipUnalignedGaps); // for each chunk for (int i=0; i<chunk_vec->size(); i++) { CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i]; if (chunk->GetType() & fSeq) { // add the sequence string if (IsPositiveStrand(row)) { seq_vec.GetSeqData(chunk->GetRange().GetFrom(), chunk->GetRange().GetTo() + 1, buff); } else { seq_vec.GetSeqData(seq_vec_size - chunk->GetRange().GetTo() - 1, seq_vec_size - chunk->GetRange().GetFrom(), buff); } buffer += buff; } else { // add appropriate number of gap/end chars const int n = chunk->GetAlnRange().GetLength(); char* ch_buff = new char[n+1]; char fill_ch; if (chunk->GetType() & fNoSeqOnLeft || chunk->GetType() & fNoSeqOnRight) { fill_ch = GetEndChar(); } else { fill_ch = GetGapChar(row); } memset(ch_buff, fill_ch, n); ch_buff[n] = 0; buffer += ch_buff; delete[] ch_buff; } } return buffer;}string& CAlnVec::GetWholeAlnSeqString(TNumrow row, string& buffer, TSeqPosList * insert_aln_starts, TSeqPosList * insert_starts, TSeqPosList * insert_lens, unsigned int scrn_width, TSeqPosList * scrn_lefts, TSeqPosList * scrn_rights) const{ TSeqPos aln_pos = 0, len = 0, curr_pos = 0, anchor_pos = 0, scrn_pos = 0, prev_len = 0, ttl_len = 0; TSignedSeqPos start = -1, stop = -1, scrn_lft_seq_pos = -1, scrn_rgt_seq_pos = -1, prev_aln_pos = -1, prev_start = -1; TNumseg seg; int pos, nscrns, delta; TSeqPos aln_len = GetAlnStop() + 1; bool anchored = IsSetAnchor(); bool plus = IsPositiveStrand(row); int width = GetWidth(row); scrn_width *= width; const bool record_inserts = insert_starts && insert_lens; const bool record_coords = scrn_width && scrn_lefts && scrn_rights; // allocate space for the row char* c_buff = new char[aln_len + 1]; char* c_buff_ptr = c_buff; string buff; const TNumseg& left_seg = x_GetSeqLeftSeg(row); const TNumseg& right_seg = x_GetSeqRightSeg(row); // loop through all segments for (seg = 0, pos = row, aln_pos = 0, anchor_pos = m_Anchor; seg < m_NumSegs; ++seg, pos += m_NumRows, anchor_pos += m_NumRows) { const TSeqPos& seg_len = m_Lens[seg]; start = m_Starts[pos]; len = seg_len * width; if (anchored && m_Starts[anchor_pos] < 0) { if (start >= 0) { // record the insert if requested if (record_inserts) { if (prev_aln_pos == (aln_pos / width) && start == (plus ? prev_start + prev_len : prev_start - len)) { // consolidate the adjacent inserts ttl_len += len; insert_lens->pop_back(); insert_lens->push_back(ttl_len); if (!plus) { insert_starts->pop_back(); insert_starts->push_back(start); } } else { prev_aln_pos = aln_pos / width; ttl_len = len; insert_starts->push_back(start); insert_aln_starts->push_back(prev_aln_pos); insert_lens->push_back(len); } prev_start = start; prev_len = len; } } } else { if (start >= 0) { stop = start + len - 1; // add regular sequence to buffer GetSeqString(buff, row, start, stop); memcpy(c_buff_ptr, buff.c_str(), seg_len); c_buff_ptr += seg_len; // take care of coords if necessary if (record_coords) { if (scrn_lft_seq_pos < 0) { scrn_lft_seq_pos = plus ? start : stop; if (scrn_rgt_seq_pos < 0) { scrn_rgt_seq_pos = scrn_lft_seq_pos; } } // previous scrns nscrns = (aln_pos - scrn_pos) / scrn_width; for (int i = 0; i < nscrns; i++) { scrn_lefts->push_back(scrn_lft_seq_pos); scrn_rights->push_back(scrn_rgt_seq_pos); if (i == 0) { scrn_lft_seq_pos = plus ? start : stop; } scrn_pos += scrn_width; } if (nscrns > 0) { scrn_lft_seq_pos = plus ? start : stop; } // current scrns nscrns = (aln_pos + len - scrn_pos) / scrn_width; curr_pos = aln_pos; for (int i = 0; i < nscrns; i++) { delta = (plus ? scrn_width - (curr_pos - scrn_pos) : curr_pos - scrn_pos - scrn_width); scrn_lefts->push_back(scrn_lft_seq_pos); if (plus ? scrn_lft_seq_pos < start : scrn_lft_seq_pos > stop) { scrn_lft_seq_pos = (plus ? start : stop) + delta; scrn_rgt_seq_pos = scrn_lft_seq_pos + (plus ? -1 : 1); } else { scrn_rgt_seq_pos = scrn_lft_seq_pos + (plus ? -1 : 1) + delta; scrn_lft_seq_pos += delta; } if (seg == left_seg && scrn_lft_seq_pos == scrn_rgt_seq_pos) { if (plus) { scrn_rgt_seq_pos--; } else { scrn_rgt_seq_pos++; } } scrn_rights->push_back(scrn_rgt_seq_pos); curr_pos = scrn_pos += scrn_width; } if (aln_pos + len <= scrn_pos) { scrn_lft_seq_pos = -1; // reset } scrn_rgt_seq_pos = plus ? stop : start; } } else { // add appropriate number of gap/end chars char* ch_buff = new char[seg_len + 1]; char fill_ch; if (seg < left_seg || seg > right_seg) { fill_ch = GetEndChar(); } else { fill_ch = GetGapChar(row); } memset(ch_buff, fill_ch, seg_len); ch_buff[seg_len] = 0; memcpy(c_buff_ptr, ch_buff, seg_len); c_buff_ptr += seg_len; delete[] ch_buff; } aln_pos += len; } }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?