alnmap.cpp

来自「ncbi源码」· C++ 代码 · 共 1,302 行 · 第 1/3 页

CPP
1,302
字号
    }    TNumseg seg = GetSeg(aln_pos);    TSignedSeqPos pos = GetStart(for_row, seg);    if (pos >= 0) {        TSeqPos delta = (aln_pos - GetAlnStart(seg)) * GetWidth(for_row);        if (IsPositiveStrand(for_row)) {            pos += delta;        } else {            pos += x_GetLen(for_row, x_GetRawSegFromSeg(seg)) - 1 - delta;        }    } else if (dir != eNone) {        // it is a gap, search in the neighbouring segments        // according to search direction (dir) and strand        bool reverse_pass = false;        TNumseg orig_seg = seg = x_GetRawSegFromSeg(seg);                    while (true) {            if (IsPositiveStrand(for_row)) {                if (dir == eBackwards  ||  dir == eLeft) {                    while (--seg >=0  &&  pos == -1) {                        pos = x_GetRawStop(for_row, seg);                    }                } else {                    while (++seg < m_NumSegs  &&  pos == -1) {                        pos = x_GetRawStart(for_row, seg);                    }                }            } else {                if (dir == eForward  ||  dir == eLeft) {                    while (--seg >=0  &&  pos == -1) {                        pos = x_GetRawStart(for_row, seg);                    }                } else {                    while (++seg < m_NumSegs  &&  pos == -1) {                        pos = x_GetRawStop(for_row, seg);                    }                 }            }            if (!try_reverse_dir) {                break;            }            if (pos >= 0) {                break; // found            } else if (reverse_pass) {                string msg = "CAlnVec::GetSeqPosFromAlnPos(): "                    "Invalid Dense-seg: Row " +                    NStr::IntToString(for_row) +                    " contains gaps only.";                NCBI_THROW(CAlnException, eInvalidDenseg, msg);            }            // not found, try reverse direction            reverse_pass = true;            seg = orig_seg;            switch (dir) {            case eLeft:                dir = eRight; break;            case eRight:                dir = eLeft; break;            case eForward:                dir = eBackwards; break;            case eBackwards:                dir = eForward; break;            }        }    }    return pos;}TSignedSeqPos CAlnMap::GetSeqPosFromSeqPos(TNumrow for_row,                                           TNumrow row, TSeqPos seq_pos) const{    TNumseg raw_seg = GetRawSeg(row, seq_pos);    if (raw_seg < 0) {        return -1;    }    unsigned offset = raw_seg * m_NumRows;    TSignedSeqPos start = m_Starts[offset + for_row];    if (start >= 0) {        TSeqPos delta            = seq_pos - m_Starts[offset + row];        if (GetWidth(for_row) != GetWidth(row)) {            delta = delta / GetWidth(row) * GetWidth(for_row);        }        return start            + (StrandSign(row) == StrandSign(for_row) ? delta               : x_GetLen(for_row, raw_seg) - 1 - delta);    } else {        return -1;    }}const CAlnMap::TNumseg& CAlnMap::x_GetSeqLeftSeg(TNumrow row) const{    TNumseg& seg = m_SeqLeftSegs[row];    if (seg < 0) {        while (++seg < m_NumSegs) {            if (m_Starts[seg * m_NumRows + row] >= 0) {                return seg;            }        }    } else {        return seg;    }    seg = -1;    string err_msg = "CAlnVec::x_GetSeqLeftSeg(): "        "Invalid Dense-seg: Row " + NStr::IntToString(row) +        " contains gaps only.";    NCBI_THROW(CAlnException, eInvalidDenseg, err_msg);}    const CAlnMap::TNumseg& CAlnMap::x_GetSeqRightSeg(TNumrow row) const{    TNumseg& seg = m_SeqRightSegs[row];    if (seg < 0) {        seg = m_NumSegs;        while (seg--) {            if (m_Starts[seg * m_NumRows + row] >= 0) {                return seg;            }        }    } else {        return seg;    }    seg = -1;    string err_msg = "CAlnVec::x_GetSeqRightSeg(): "        "Invalid Dense-seg: Row " + NStr::IntToString(row) +        " contains gaps only.";    NCBI_THROW(CAlnException, eInvalidDenseg, err_msg);}    void CAlnMap::GetResidueIndexMap(TNumrow row0,                                 TNumrow row1,                                 TRange aln_rng,                                 vector<TSignedSeqPos>& result,                                 TRange& rng0,                                 TRange& rng1) const{    _ASSERT( ! IsSetAnchor() );    TNumseg l_seg, r_seg;    TSeqPos aln_start = aln_rng.GetFrom();    TSeqPos aln_stop = aln_rng.GetTo();    int l_idx0 = row0;    int l_idx1 = row1;    TSeqPos aln_pos = 0, next_aln_pos, l_len, r_len, l_delta, r_delta;    bool plus0 = IsPositiveStrand(row0);    bool plus1 = IsPositiveStrand(row1);    TSeqPos l_pos0, r_pos0, l_pos1, r_pos1;    l_seg = 0;    while (l_seg < m_NumSegs) {        l_len = m_Lens[l_seg];        next_aln_pos = aln_pos + l_len;        if (m_Starts[l_idx0] >= 0  &&  m_Starts[l_idx1] >= 0  &&            aln_start >= aln_pos  &&  aln_start < next_aln_pos) {            // found the left seg            break;        }        aln_pos = next_aln_pos;        l_idx0 += m_NumRows; l_idx1 += m_NumRows;        l_seg++;    }    _ASSERT(l_seg < m_NumSegs);    // determine left seq positions    l_pos0 = m_Starts[l_idx0];    l_pos1 = m_Starts[l_idx1];    l_delta = aln_start - aln_pos;    l_len -= l_delta;    _ASSERT(l_delta >= 0);    if (plus0) {        l_pos0 += l_delta;    } else {        l_pos0 += l_len - 1;    }    if (plus1) {        l_pos1 += l_delta;    } else {        l_pos1 += l_len - 1;    }            r_seg = m_NumSegs - 1;    int r_idx0 = r_seg * m_NumRows + row0;    int r_idx1 = r_seg * m_NumRows + row1;    aln_pos = GetAlnStop();    if (aln_stop > aln_pos) {        aln_stop = aln_pos;    }    while (r_seg >= 0) {        r_len = m_Lens[r_seg];        next_aln_pos = aln_pos - r_len;        if (m_Starts[l_idx0] >= 0  &&  m_Starts[l_idx1] >= 0  &&            aln_stop > next_aln_pos  &&  aln_stop <= aln_pos) {            // found the right seg            break;        }        aln_pos = next_aln_pos;        r_idx0 -= m_NumRows; r_idx1 -= m_NumRows;        r_seg--;    }        // determine right seq positions    r_pos0 = m_Starts[r_idx0];    r_pos1 = m_Starts[r_idx1];    r_delta = aln_pos - aln_stop;    r_len -= r_delta;    _ASSERT(r_delta >= 0);    if (plus0) {        r_pos0 += r_len - 1;    } else {        r_pos0 += r_delta;    }    if (plus1) {        r_pos1 += r_len - 1;    } else {        r_pos1 += r_delta;    }            // We now know the size of the resulting vector    TSeqPos size = (plus0 ? r_pos0 - l_pos0 : l_pos0 - r_pos0) + 1;    result.resize(size, -1);    // Initialize index positions (from left to right)    TSeqPos pos0 = plus0 ? 0 : l_pos0 - r_pos0;    TSeqPos pos1 = plus1 ? 0 : l_pos1 - r_pos1;    // Initialize 'next' positions    // -- to determine if there are unaligned pieces    TSeqPos next_l_pos0 = plus0 ? l_pos0 + l_len : l_pos0 - l_len;    TSeqPos next_l_pos1 = plus1 ? l_pos1 + l_len : l_pos1 - l_len;    l_idx0 = row0;    l_idx1 = row1;    TNumseg seg = l_seg;    TSeqPos delta;    while (true) {        if (m_Starts[l_idx0] >= 0) { // if row0 is not gapped            if (seg > l_seg) {                // check for unaligned region / validate                if (plus0) {                    delta = m_Starts[l_idx0] - next_l_pos0;                    next_l_pos0 = m_Starts[l_idx0] + l_len;                } else {                    delta = next_l_pos0 - m_Starts[l_idx0] - l_len + 1;                    next_l_pos0 = m_Starts[l_idx0] - 1;                }                if (delta > 0) {                    // unaligned region                    if (plus0) {                        pos0 += delta;                    } else {                        pos0 -= delta;                    }                } else if (delta < 0) {                    // invalid segment                    string errstr = string("CAlnMap::GetResidueIndexMap():")                        + " Starts are not consistent!"                        + " Row=" + NStr::IntToString(row0) +                        " Seg=" + NStr::IntToString(seg);                    NCBI_THROW(CAlnException, eInvalidDenseg, errstr);                }            }            if (m_Starts[l_idx1] >= 0) { // if row1 is not gapped                                if (seg > l_seg) {                    // check for unaligned region / validate                    if (plus1) {                        delta = m_Starts[l_idx1] - next_l_pos1;                        next_l_pos1 = m_Starts[l_idx1] + l_len;                    } else {                        delta = next_l_pos1 - m_Starts[l_idx1] - l_len + 1;                        next_l_pos1 = m_Starts[l_idx1] - 1;                    }                    if (delta > 0) {                        // unaligned region                        if (plus1) {                            pos1 += delta;                        } else {                            pos1 -= delta;                        }                    } else if (delta < 0) {                        // invalid segment                        string errstr = string("CAlnMap::GetResidueIndexMap():")                            + " Starts are not consistent!"                            + " Row=" + NStr::IntToString(row1) +                            " Seg=" + NStr::IntToString(seg);                        NCBI_THROW(CAlnException, eInvalidDenseg, errstr);                    }                }                if (plus0) {                    if (plus1) { // if row1 on +                        while (l_len--) {                            result[pos0++] = pos1++;                        }                    } else { // if row1 on -                        while (l_len--) {                            result[pos0++] = pos1--;                        }                    }                } else { // if row0 on -                    if (plus1) { // if row1 on +                        while (l_len--) {                            result[pos0--] = pos1++;                        }                    } else { // if row1 on -                        while (l_len--) {                            result[pos0--] = pos1--;                        }                    }                }                                } else {                if (plus0) {                    pos0 += l_len;                } else {                    pos0 -= l_len;                }            }        }        // iterate to next segment        seg++;        l_idx0 += m_NumRows;        l_idx1 += m_NumRows;        if (seg < r_seg) {            l_len = m_Lens[seg];        } else if (seg == r_seg) {            l_len = r_len;        } else {            break;        }    }    // finally, set the ranges for the two sequences    rng0.SetFrom(plus0 ? l_pos0 : r_pos0);    rng0.SetTo(plus0 ? r_pos0 : l_pos0);    rng1.SetFrom(plus1 ? l_pos1 : r_pos1);    rng1.SetTo(plus1 ? r_pos1 : l_pos1);}TSignedSeqPos CAlnMap::GetSeqAlnStart(TNumrow row) const{    if (IsSetAnchor()) {        TNumseg seg = -1;        while (++seg < m_AlnSegIdx.size()) {            if (m_Starts[m_AlnSegIdx[seg] * m_NumRows + row] >= 0) {                return GetAlnStart(seg);            }        }        return -1;    } else {        return GetAlnStart(x_GetSeqLeftSeg(row));    }}TSignedSeqPos CAlnMap::GetSeqAlnStop(TNumrow row) const{    if (IsSetAnchor()) {        TNumseg seg = m_AlnSegIdx.size();        while (seg--) {            if (m_Starts[m_AlnSegIdx[seg] * m_NumRows + row] >= 0) {                return GetAlnStop(seg);            }        }        return -1;    } else {        return GetAlnStop(x_GetSeqRightSeg(row));    }}CRef<CAlnMap::CAlnChunkVec>CAlnMap::GetAlnChunks(TNumrow row, const TSignedRange& range,                      TGetChunkFlags flags) const{    CRef<CAlnChunkVec> vec(new CAlnChunkVec(*this, row));    // boundaries check    if (range.GetTo() < 0        ||  range.GetFrom() > (TSignedSeqPos) GetAlnStop(GetNumSegs() - 1)) {        return vec;    }    // determine the participating segments range    TNumseg first_seg, last_seg, aln_seg;    if (range.GetFrom() < 0) {        first_seg = 0;    } else {                first_seg = x_GetRawSegFromSeg(aln_seg = GetSeg(range.GetFrom()));        if ( !(flags & fDoNotTruncateSegs) ) {            vec->m_LeftDelta = range.GetFrom() - GetAlnStart(aln_seg);        }    }    if ((TSeqPos)range.GetTo() > GetAlnStop(GetNumSegs()-1)) {        last_seg = m_NumSegs-1;    } else {        last_seg = x_GetRawSegFromSeg(aln_seg = GetSeg(range.GetTo()));        if ( !(flags & fDoNotTruncateSegs) ) {            vec->m_RightDelta = GetAlnStop(aln_seg) - range.GetTo();        }    }        x_GetChunks(vec, row, first_seg, last_seg, flags);    return vec;}CRef<CAlnMap::CAlnChunkVec>CAlnMap::GetSeqChunks(TNumrow row, const TSignedRange& range,                      TGetChunkFlags flags) const{    CRef<CAlnChunkVec> vec(new CAlnChunkVec(*this, row));    // boundaries check    if (range.GetTo() < GetSeqStart(row)  ||        range.GetFrom() > GetSeqStop(row)) {        return vec;    }    // determine the participating segments range    TNumseg first_seg, last_seg;    if (range.GetFrom() < GetSeqStart(row)) {        if (IsPositiveStrand(row)) {            first_seg = 0;        } else {            last_seg = m_NumSegs - 1;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?