alnmap.cpp

来自「ncbi源码」· C++ 代码 · 共 1,302 行 · 第 1/3 页

CPP
1,302
字号
/* * =========================================================================== * PRODUCTION $Log: alnmap.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 19:40:42  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.48 * PRODUCTION * =========================================================================== *//*  $Id: alnmap.cpp,v 1000.3 2004/06/01 19:40:42 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Kamen Todorov, NCBI** File Description:*   Interface for examining alignments (of type Dense-seg)** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/alnmgr/alnmap.hpp>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::void CAlnMap::x_Init(void){    m_SeqLeftSegs.resize(GetNumRows(), -1);    m_SeqRightSegs.resize(GetNumRows(), -1);}void CAlnMap::x_CreateAlnStarts(void){    m_AlnStarts.clear();    m_AlnStarts.reserve(GetNumSegs());        int start = 0, len = 0;    for (int i = 0;  i < GetNumSegs();  ++i) {        start += len;        m_AlnStarts.push_back(start);        len = m_Lens[i];    }}void CAlnMap::UnsetAnchor(void){    m_AlnSegIdx.clear();    m_NumSegWithOffsets.clear();    if (m_RawSegTypes) {        delete m_RawSegTypes;        m_RawSegTypes = 0;    }    m_Anchor = -1;    // we must call this last, as it uses some internal shenanigans that    // are affected by the reset above    x_CreateAlnStarts();}void CAlnMap::SetAnchor(TNumrow anchor){    if (anchor == -1) {        UnsetAnchor();        return;    }    if (anchor < 0  ||  anchor >= m_NumRows) {        NCBI_THROW(CAlnException, eInvalidRow,                   "CAlnVec::SetAnchor(): "                   "Invalid row");    }    m_AlnSegIdx.clear();    m_AlnStarts.clear();    m_NumSegWithOffsets.clear();    if (m_RawSegTypes) {        delete m_RawSegTypes;        m_RawSegTypes = 0;    }    int start = 0, len = 0, aln_seg = -1, offset = 0;        m_Anchor = anchor;    for (int i = 0, pos = m_Anchor;  i < m_NumSegs;         ++i, pos += m_NumRows) {        if (m_Starts[pos] != -1) {            ++aln_seg;            offset = 0;            m_AlnSegIdx.push_back(i);            m_NumSegWithOffsets.push_back(CNumSegWithOffset(aln_seg));            start += len;            m_AlnStarts.push_back(start);            len = m_Lens[i];        } else {            ++offset;            m_NumSegWithOffsets.push_back(CNumSegWithOffset(aln_seg, offset));        }    }    if (!m_AlnSegIdx.size()) {        NCBI_THROW(CAlnException, eInvalidDenseg,                   "CAlnVec::SetAnchor(): "                   "Invalid Dense-seg: No sequence on the anchor row");    }}CAlnMap::TSegTypeFlags CAlnMap::x_SetRawSegType(TNumrow row, TNumseg seg) const{    TSegTypeFlags flags = 0;    TNumseg       l_seg, r_seg, l_index, r_index, index;    TSeqPos       cont_next_start, cont_prev_stop;    l_seg = r_seg = seg;    l_index = r_index = index = seg * m_NumRows + row;    TSignedSeqPos start = m_Starts[index];    // is it seq or gap?    if (start >= 0) {        flags |= fSeq;        cont_next_start = start + x_GetLen(row, seg);        cont_prev_stop  = start;    }    // is it aligned to sequence on the anchor?    if (IsSetAnchor()) {        flags |= fNotAlignedToSeqOnAnchor;        if (m_Starts[seg * m_NumRows + m_Anchor] >= 0) {            flags &= ~(flags & fNotAlignedToSeqOnAnchor);        }    }    // what's on the right?    if (r_seg < m_NumSegs - 1) {        flags |= fEndOnRight;    }    flags |= fNoSeqOnRight;    while (++r_seg < m_NumSegs) {        flags &= ~(flags & fEndOnRight);        r_index += m_NumRows;        if ((start = m_Starts[r_index]) >= 0) {            if ((flags & fSeq) &&                 (IsPositiveStrand(row) ?                 start != (TSignedSeqPos)cont_next_start :                 start + x_GetLen(row, r_seg) != cont_prev_stop)) {                flags |= fUnalignedOnRight;            }            flags &= ~(flags & fNoSeqOnRight);            break;        }    }    // what's on the left?    if (l_seg > 0) {        flags |= fEndOnLeft;    }    flags |= fNoSeqOnLeft;    while (--l_seg >= 0) {        flags &= ~(flags & fEndOnLeft);        l_index -= m_NumRows;        if ((start = m_Starts[l_index]) >= 0) {            if ((flags & fSeq) &&                 (IsPositiveStrand(row) ?                 start + x_GetLen(row, l_seg) != cont_prev_stop :                 start != (TSignedSeqPos)cont_next_start)) {                flags |= fUnalignedOnLeft;            }            flags &= ~(flags & fNoSeqOnLeft);            break;        }    }            // add to cache    if ( !m_RawSegTypes ) {        // Using kZero for 0 works around a bug in Compaq's C++ compiler.        static const TSegTypeFlags kZero = 0;        m_RawSegTypes = new vector<TSegTypeFlags>            (m_NumRows * m_NumSegs, kZero);    }    (*m_RawSegTypes)[row + m_NumRows * seg] = flags | fTypeIsSet;    return flags;}CAlnMap::TNumseg CAlnMap::GetSeg(TSeqPos aln_pos) const{    TNumseg btm, top, mid;    btm = 0;    top = m_AlnStarts.size() - 1;    if (aln_pos > m_AlnStarts[top] +        m_Lens[x_GetRawSegFromSeg(top)] - 1)         return -1; // out of range    while (btm < top) {        mid = (top + btm) / 2;        if (m_AlnStarts[mid] == (TSignedSeqPos)aln_pos) {            return mid;        }        if (m_AlnStarts[mid + 1] <= (TSignedSeqPos)aln_pos) {            btm = mid + 1;         } else {            top = mid;        }    }     return top;}CAlnMap::TNumsegCAlnMap::GetRawSeg(TNumrow row, TSeqPos seq_pos,                   ESearchDirection dir, bool try_reverse_dir) const{    TSignedSeqPos start = -1, sseq_pos = seq_pos;    TNumseg       btm, top, mid, cur, last, cur_top, cur_btm;    btm = cur_btm = 0; cur = top = last = cur_top = m_NumSegs - 1;    bool plus = IsPositiveStrand(row);    // if out-of-range, return either -1 or the closest seg in dir direction    if (sseq_pos < GetSeqStart(row)) {        if (dir == eNone) {            return -1;        } else if (dir == eForward  ||                    dir == (plus ? eRight : eLeft)  ||                   try_reverse_dir) {            TNumseg seg;            if (plus) {                seg = -1;                while (++seg < m_NumSegs) {                    if (m_Starts[seg * m_NumRows + row] >= 0) {                        return seg;                    }                }            } else {                seg = m_NumSegs;                while (seg--) {                    if (m_Starts[seg * m_NumRows + row] >= 0) {                        return seg;                    }                }            }        }    } else if (sseq_pos > GetSeqStop(row)) {        if (dir == eNone) {            return -1;        } else if (dir == eBackwards  ||                   dir == (plus ? eLeft : eRight)  ||                   try_reverse_dir) {            TNumseg seg;            if (plus) {                seg = m_NumSegs;                while (seg--) {                    if (m_Starts[seg * m_NumRows + row] >= 0) {                        return seg;                    }                }            } else {                seg = -1;                while (++seg < m_NumSegs) {                    if (m_Starts[seg * m_NumRows + row] >= 0) {                        return seg;                    }                }            }        }    }    // main loop    while (btm <= top) {        cur = mid = (top + btm) / 2;        while (cur <= top               &&  (start = m_Starts[(plus ? cur : last - cur)                                              * m_NumRows + row]) < 0) {            ++cur;        }        if (cur <= top && start >= 0) {            if (sseq_pos >= start &&                seq_pos < start + x_GetLen(row, plus ? cur : last - cur)) {                return (plus ? cur : last - cur); // found            }            if (sseq_pos > start) {                btm = cur + 1;                cur_btm = cur;            } else {                top = mid - 1;                cur_top = cur;            }            continue;        }        cur = mid-1;        while (cur >= btm &&               (start = m_Starts[(plus ? cur : last - cur)                                         * m_NumRows + row]) < 0) {            --cur;        }        if (cur >= btm && start >= 0) {            if (sseq_pos >= start                &&  seq_pos < start + x_GetLen(row, plus ? cur : last - cur)) {                return (plus ? cur : last - cur); // found            }            if (sseq_pos > start) {                btm = mid + 1;                cur_btm = cur;            } else {                top = cur - 1;                cur_top = cur;            }            continue;        }        // if we get here, seq_pos falls into an unaligned region        // return either -1 or the closest segment in dir direction        if (dir == eNone) {            return -1;        } else if (dir == eBackwards  ||  dir == (plus ? eLeft : eRight)) {            return (plus ? cur_btm : last - cur_btm);        } else if (dir == eForward  ||  dir == (plus ? eRight : eLeft)) {            return (plus ? cur_top : last - cur_top);        }    }            return -1; /* No match found */}    TSignedSeqPos CAlnMap::GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos,                                           ESearchDirection dir,                                           bool try_reverse_dir) const{    TNumseg raw_seg = GetRawSeg(row, seq_pos, dir, try_reverse_dir);    if (raw_seg < 0) { // out of seq range        return -1;    }    TSeqPos start = m_Starts[raw_seg * m_NumRows + row];    TSeqPos len   = x_GetLen(row, raw_seg);    TSeqPos stop  = start + len -1;    bool    plus  = IsPositiveStrand(row);    CNumSegWithOffset seg = x_GetSegFromRawSeg(raw_seg);    if (dir == eNone) {        if (seg.GetOffset()) {            // seq_pos is within an insert            return -1;        }     } else {        // check if within unaligned region        // if seq_pos outside the segment returned by GetRawSeg        // then return the edge alnpos        if ((plus ? seq_pos < start : seq_pos > stop)) {            return GetAlnStart(seg.GetAlnSeg());        }        if ((plus ? seq_pos > stop : seq_pos < start)) {            return GetAlnStop(seg.GetAlnSeg());        }        // check if within an insert        if (seg.GetOffset()  &&              (dir == eRight  ||             dir == (plus ? eForward : eBackwards))) {            // seek the nearest alnpos on the right            if (seg.GetAlnSeg() < GetNumSegs() - 1) {                return GetAlnStart(seg.GetAlnSeg() + 1);            } else if (try_reverse_dir) {                return GetAlnStop(seg.GetAlnSeg());            } else {                return -1;            }        }        if (seg.GetOffset()  &&            (dir == eLeft  ||             dir == (plus ? eBackwards : eForward))) {                        // seek the nearest alnpos on left            if (seg.GetAlnSeg() >= 0) {                return GetAlnStop(seg.GetAlnSeg());            } else if (try_reverse_dir) {                return GetAlnStart(seg.GetAlnSeg() + 1);            } else {                return -1;            }        }    }     // main case: seq_pos is within an alnseg    //assert(seq_pos >= start  &&  seq_pos <= stop);    TSeqPos delta = (seq_pos - start) / GetWidth(row);    return m_AlnStarts[seg.GetAlnSeg()]        + (plus ? delta : m_Lens[raw_seg] - 1 - delta);}TSignedSeqPos CAlnMap::GetSeqPosFromAlnPos(TNumrow for_row,                                           TSeqPos aln_pos,                                           ESearchDirection dir,                                           bool try_reverse_dir) const{    if (aln_pos > GetAlnStop()) {        aln_pos = GetAlnStop(); // out-of-range adjustment

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?