alnmap.cpp
来自「ncbi源码」· C++ 代码 · 共 1,302 行 · 第 1/3 页
CPP
1,302 行
/* * =========================================================================== * PRODUCTION $Log: alnmap.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 19:40:42 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.48 * PRODUCTION * =========================================================================== *//* $Id: alnmap.cpp,v 1000.3 2004/06/01 19:40:42 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Kamen Todorov, NCBI** File Description:* Interface for examining alignments (of type Dense-seg)** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/alnmgr/alnmap.hpp>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::void CAlnMap::x_Init(void){ m_SeqLeftSegs.resize(GetNumRows(), -1); m_SeqRightSegs.resize(GetNumRows(), -1);}void CAlnMap::x_CreateAlnStarts(void){ m_AlnStarts.clear(); m_AlnStarts.reserve(GetNumSegs()); int start = 0, len = 0; for (int i = 0; i < GetNumSegs(); ++i) { start += len; m_AlnStarts.push_back(start); len = m_Lens[i]; }}void CAlnMap::UnsetAnchor(void){ m_AlnSegIdx.clear(); m_NumSegWithOffsets.clear(); if (m_RawSegTypes) { delete m_RawSegTypes; m_RawSegTypes = 0; } m_Anchor = -1; // we must call this last, as it uses some internal shenanigans that // are affected by the reset above x_CreateAlnStarts();}void CAlnMap::SetAnchor(TNumrow anchor){ if (anchor == -1) { UnsetAnchor(); return; } if (anchor < 0 || anchor >= m_NumRows) { NCBI_THROW(CAlnException, eInvalidRow, "CAlnVec::SetAnchor(): " "Invalid row"); } m_AlnSegIdx.clear(); m_AlnStarts.clear(); m_NumSegWithOffsets.clear(); if (m_RawSegTypes) { delete m_RawSegTypes; m_RawSegTypes = 0; } int start = 0, len = 0, aln_seg = -1, offset = 0; m_Anchor = anchor; for (int i = 0, pos = m_Anchor; i < m_NumSegs; ++i, pos += m_NumRows) { if (m_Starts[pos] != -1) { ++aln_seg; offset = 0; m_AlnSegIdx.push_back(i); m_NumSegWithOffsets.push_back(CNumSegWithOffset(aln_seg)); start += len; m_AlnStarts.push_back(start); len = m_Lens[i]; } else { ++offset; m_NumSegWithOffsets.push_back(CNumSegWithOffset(aln_seg, offset)); } } if (!m_AlnSegIdx.size()) { NCBI_THROW(CAlnException, eInvalidDenseg, "CAlnVec::SetAnchor(): " "Invalid Dense-seg: No sequence on the anchor row"); }}CAlnMap::TSegTypeFlags CAlnMap::x_SetRawSegType(TNumrow row, TNumseg seg) const{ TSegTypeFlags flags = 0; TNumseg l_seg, r_seg, l_index, r_index, index; TSeqPos cont_next_start, cont_prev_stop; l_seg = r_seg = seg; l_index = r_index = index = seg * m_NumRows + row; TSignedSeqPos start = m_Starts[index]; // is it seq or gap? if (start >= 0) { flags |= fSeq; cont_next_start = start + x_GetLen(row, seg); cont_prev_stop = start; } // is it aligned to sequence on the anchor? if (IsSetAnchor()) { flags |= fNotAlignedToSeqOnAnchor; if (m_Starts[seg * m_NumRows + m_Anchor] >= 0) { flags &= ~(flags & fNotAlignedToSeqOnAnchor); } } // what's on the right? if (r_seg < m_NumSegs - 1) { flags |= fEndOnRight; } flags |= fNoSeqOnRight; while (++r_seg < m_NumSegs) { flags &= ~(flags & fEndOnRight); r_index += m_NumRows; if ((start = m_Starts[r_index]) >= 0) { if ((flags & fSeq) && (IsPositiveStrand(row) ? start != (TSignedSeqPos)cont_next_start : start + x_GetLen(row, r_seg) != cont_prev_stop)) { flags |= fUnalignedOnRight; } flags &= ~(flags & fNoSeqOnRight); break; } } // what's on the left? if (l_seg > 0) { flags |= fEndOnLeft; } flags |= fNoSeqOnLeft; while (--l_seg >= 0) { flags &= ~(flags & fEndOnLeft); l_index -= m_NumRows; if ((start = m_Starts[l_index]) >= 0) { if ((flags & fSeq) && (IsPositiveStrand(row) ? start + x_GetLen(row, l_seg) != cont_prev_stop : start != (TSignedSeqPos)cont_next_start)) { flags |= fUnalignedOnLeft; } flags &= ~(flags & fNoSeqOnLeft); break; } } // add to cache if ( !m_RawSegTypes ) { // Using kZero for 0 works around a bug in Compaq's C++ compiler. static const TSegTypeFlags kZero = 0; m_RawSegTypes = new vector<TSegTypeFlags> (m_NumRows * m_NumSegs, kZero); } (*m_RawSegTypes)[row + m_NumRows * seg] = flags | fTypeIsSet; return flags;}CAlnMap::TNumseg CAlnMap::GetSeg(TSeqPos aln_pos) const{ TNumseg btm, top, mid; btm = 0; top = m_AlnStarts.size() - 1; if (aln_pos > m_AlnStarts[top] + m_Lens[x_GetRawSegFromSeg(top)] - 1) return -1; // out of range while (btm < top) { mid = (top + btm) / 2; if (m_AlnStarts[mid] == (TSignedSeqPos)aln_pos) { return mid; } if (m_AlnStarts[mid + 1] <= (TSignedSeqPos)aln_pos) { btm = mid + 1; } else { top = mid; } } return top;}CAlnMap::TNumsegCAlnMap::GetRawSeg(TNumrow row, TSeqPos seq_pos, ESearchDirection dir, bool try_reverse_dir) const{ TSignedSeqPos start = -1, sseq_pos = seq_pos; TNumseg btm, top, mid, cur, last, cur_top, cur_btm; btm = cur_btm = 0; cur = top = last = cur_top = m_NumSegs - 1; bool plus = IsPositiveStrand(row); // if out-of-range, return either -1 or the closest seg in dir direction if (sseq_pos < GetSeqStart(row)) { if (dir == eNone) { return -1; } else if (dir == eForward || dir == (plus ? eRight : eLeft) || try_reverse_dir) { TNumseg seg; if (plus) { seg = -1; while (++seg < m_NumSegs) { if (m_Starts[seg * m_NumRows + row] >= 0) { return seg; } } } else { seg = m_NumSegs; while (seg--) { if (m_Starts[seg * m_NumRows + row] >= 0) { return seg; } } } } } else if (sseq_pos > GetSeqStop(row)) { if (dir == eNone) { return -1; } else if (dir == eBackwards || dir == (plus ? eLeft : eRight) || try_reverse_dir) { TNumseg seg; if (plus) { seg = m_NumSegs; while (seg--) { if (m_Starts[seg * m_NumRows + row] >= 0) { return seg; } } } else { seg = -1; while (++seg < m_NumSegs) { if (m_Starts[seg * m_NumRows + row] >= 0) { return seg; } } } } } // main loop while (btm <= top) { cur = mid = (top + btm) / 2; while (cur <= top && (start = m_Starts[(plus ? cur : last - cur) * m_NumRows + row]) < 0) { ++cur; } if (cur <= top && start >= 0) { if (sseq_pos >= start && seq_pos < start + x_GetLen(row, plus ? cur : last - cur)) { return (plus ? cur : last - cur); // found } if (sseq_pos > start) { btm = cur + 1; cur_btm = cur; } else { top = mid - 1; cur_top = cur; } continue; } cur = mid-1; while (cur >= btm && (start = m_Starts[(plus ? cur : last - cur) * m_NumRows + row]) < 0) { --cur; } if (cur >= btm && start >= 0) { if (sseq_pos >= start && seq_pos < start + x_GetLen(row, plus ? cur : last - cur)) { return (plus ? cur : last - cur); // found } if (sseq_pos > start) { btm = mid + 1; cur_btm = cur; } else { top = cur - 1; cur_top = cur; } continue; } // if we get here, seq_pos falls into an unaligned region // return either -1 or the closest segment in dir direction if (dir == eNone) { return -1; } else if (dir == eBackwards || dir == (plus ? eLeft : eRight)) { return (plus ? cur_btm : last - cur_btm); } else if (dir == eForward || dir == (plus ? eRight : eLeft)) { return (plus ? cur_top : last - cur_top); } } return -1; /* No match found */} TSignedSeqPos CAlnMap::GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir, bool try_reverse_dir) const{ TNumseg raw_seg = GetRawSeg(row, seq_pos, dir, try_reverse_dir); if (raw_seg < 0) { // out of seq range return -1; } TSeqPos start = m_Starts[raw_seg * m_NumRows + row]; TSeqPos len = x_GetLen(row, raw_seg); TSeqPos stop = start + len -1; bool plus = IsPositiveStrand(row); CNumSegWithOffset seg = x_GetSegFromRawSeg(raw_seg); if (dir == eNone) { if (seg.GetOffset()) { // seq_pos is within an insert return -1; } } else { // check if within unaligned region // if seq_pos outside the segment returned by GetRawSeg // then return the edge alnpos if ((plus ? seq_pos < start : seq_pos > stop)) { return GetAlnStart(seg.GetAlnSeg()); } if ((plus ? seq_pos > stop : seq_pos < start)) { return GetAlnStop(seg.GetAlnSeg()); } // check if within an insert if (seg.GetOffset() && (dir == eRight || dir == (plus ? eForward : eBackwards))) { // seek the nearest alnpos on the right if (seg.GetAlnSeg() < GetNumSegs() - 1) { return GetAlnStart(seg.GetAlnSeg() + 1); } else if (try_reverse_dir) { return GetAlnStop(seg.GetAlnSeg()); } else { return -1; } } if (seg.GetOffset() && (dir == eLeft || dir == (plus ? eBackwards : eForward))) { // seek the nearest alnpos on left if (seg.GetAlnSeg() >= 0) { return GetAlnStop(seg.GetAlnSeg()); } else if (try_reverse_dir) { return GetAlnStart(seg.GetAlnSeg() + 1); } else { return -1; } } } // main case: seq_pos is within an alnseg //assert(seq_pos >= start && seq_pos <= stop); TSeqPos delta = (seq_pos - start) / GetWidth(row); return m_AlnStarts[seg.GetAlnSeg()] + (plus ? delta : m_Lens[raw_seg] - 1 - delta);}TSignedSeqPos CAlnMap::GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, ESearchDirection dir, bool try_reverse_dir) const{ if (aln_pos > GetAlnStop()) { aln_pos = GetAlnStop(); // out-of-range adjustment
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?