alnmap.cpp
来自「ncbi源码」· C++ 代码 · 共 1,302 行 · 第 1/3 页
CPP
1,302 行
} TNumseg seg = GetSeg(aln_pos); TSignedSeqPos pos = GetStart(for_row, seg); if (pos >= 0) { TSeqPos delta = (aln_pos - GetAlnStart(seg)) * GetWidth(for_row); if (IsPositiveStrand(for_row)) { pos += delta; } else { pos += x_GetLen(for_row, x_GetRawSegFromSeg(seg)) - 1 - delta; } } else if (dir != eNone) { // it is a gap, search in the neighbouring segments // according to search direction (dir) and strand bool reverse_pass = false; TNumseg orig_seg = seg = x_GetRawSegFromSeg(seg); while (true) { if (IsPositiveStrand(for_row)) { if (dir == eBackwards || dir == eLeft) { while (--seg >=0 && pos == -1) { pos = x_GetRawStop(for_row, seg); } } else { while (++seg < m_NumSegs && pos == -1) { pos = x_GetRawStart(for_row, seg); } } } else { if (dir == eForward || dir == eLeft) { while (--seg >=0 && pos == -1) { pos = x_GetRawStart(for_row, seg); } } else { while (++seg < m_NumSegs && pos == -1) { pos = x_GetRawStop(for_row, seg); } } } if (!try_reverse_dir) { break; } if (pos >= 0) { break; // found } else if (reverse_pass) { string msg = "CAlnVec::GetSeqPosFromAlnPos(): " "Invalid Dense-seg: Row " + NStr::IntToString(for_row) + " contains gaps only."; NCBI_THROW(CAlnException, eInvalidDenseg, msg); } // not found, try reverse direction reverse_pass = true; seg = orig_seg; switch (dir) { case eLeft: dir = eRight; break; case eRight: dir = eLeft; break; case eForward: dir = eBackwards; break; case eBackwards: dir = eForward; break; } } } return pos;}TSignedSeqPos CAlnMap::GetSeqPosFromSeqPos(TNumrow for_row, TNumrow row, TSeqPos seq_pos) const{ TNumseg raw_seg = GetRawSeg(row, seq_pos); if (raw_seg < 0) { return -1; } unsigned offset = raw_seg * m_NumRows; TSignedSeqPos start = m_Starts[offset + for_row]; if (start >= 0) { TSeqPos delta = seq_pos - m_Starts[offset + row]; if (GetWidth(for_row) != GetWidth(row)) { delta = delta / GetWidth(row) * GetWidth(for_row); } return start + (StrandSign(row) == StrandSign(for_row) ? delta : x_GetLen(for_row, raw_seg) - 1 - delta); } else { return -1; }}const CAlnMap::TNumseg& CAlnMap::x_GetSeqLeftSeg(TNumrow row) const{ TNumseg& seg = m_SeqLeftSegs[row]; if (seg < 0) { while (++seg < m_NumSegs) { if (m_Starts[seg * m_NumRows + row] >= 0) { return seg; } } } else { return seg; } seg = -1; string err_msg = "CAlnVec::x_GetSeqLeftSeg(): " "Invalid Dense-seg: Row " + NStr::IntToString(row) + " contains gaps only."; NCBI_THROW(CAlnException, eInvalidDenseg, err_msg);} const CAlnMap::TNumseg& CAlnMap::x_GetSeqRightSeg(TNumrow row) const{ TNumseg& seg = m_SeqRightSegs[row]; if (seg < 0) { seg = m_NumSegs; while (seg--) { if (m_Starts[seg * m_NumRows + row] >= 0) { return seg; } } } else { return seg; } seg = -1; string err_msg = "CAlnVec::x_GetSeqRightSeg(): " "Invalid Dense-seg: Row " + NStr::IntToString(row) + " contains gaps only."; NCBI_THROW(CAlnException, eInvalidDenseg, err_msg);} void CAlnMap::GetResidueIndexMap(TNumrow row0, TNumrow row1, TRange aln_rng, vector<TSignedSeqPos>& result, TRange& rng0, TRange& rng1) const{ _ASSERT( ! IsSetAnchor() ); TNumseg l_seg, r_seg; TSeqPos aln_start = aln_rng.GetFrom(); TSeqPos aln_stop = aln_rng.GetTo(); int l_idx0 = row0; int l_idx1 = row1; TSeqPos aln_pos = 0, next_aln_pos, l_len, r_len, l_delta, r_delta; bool plus0 = IsPositiveStrand(row0); bool plus1 = IsPositiveStrand(row1); TSeqPos l_pos0, r_pos0, l_pos1, r_pos1; l_seg = 0; while (l_seg < m_NumSegs) { l_len = m_Lens[l_seg]; next_aln_pos = aln_pos + l_len; if (m_Starts[l_idx0] >= 0 && m_Starts[l_idx1] >= 0 && aln_start >= aln_pos && aln_start < next_aln_pos) { // found the left seg break; } aln_pos = next_aln_pos; l_idx0 += m_NumRows; l_idx1 += m_NumRows; l_seg++; } _ASSERT(l_seg < m_NumSegs); // determine left seq positions l_pos0 = m_Starts[l_idx0]; l_pos1 = m_Starts[l_idx1]; l_delta = aln_start - aln_pos; l_len -= l_delta; _ASSERT(l_delta >= 0); if (plus0) { l_pos0 += l_delta; } else { l_pos0 += l_len - 1; } if (plus1) { l_pos1 += l_delta; } else { l_pos1 += l_len - 1; } r_seg = m_NumSegs - 1; int r_idx0 = r_seg * m_NumRows + row0; int r_idx1 = r_seg * m_NumRows + row1; aln_pos = GetAlnStop(); if (aln_stop > aln_pos) { aln_stop = aln_pos; } while (r_seg >= 0) { r_len = m_Lens[r_seg]; next_aln_pos = aln_pos - r_len; if (m_Starts[l_idx0] >= 0 && m_Starts[l_idx1] >= 0 && aln_stop > next_aln_pos && aln_stop <= aln_pos) { // found the right seg break; } aln_pos = next_aln_pos; r_idx0 -= m_NumRows; r_idx1 -= m_NumRows; r_seg--; } // determine right seq positions r_pos0 = m_Starts[r_idx0]; r_pos1 = m_Starts[r_idx1]; r_delta = aln_pos - aln_stop; r_len -= r_delta; _ASSERT(r_delta >= 0); if (plus0) { r_pos0 += r_len - 1; } else { r_pos0 += r_delta; } if (plus1) { r_pos1 += r_len - 1; } else { r_pos1 += r_delta; } // We now know the size of the resulting vector TSeqPos size = (plus0 ? r_pos0 - l_pos0 : l_pos0 - r_pos0) + 1; result.resize(size, -1); // Initialize index positions (from left to right) TSeqPos pos0 = plus0 ? 0 : l_pos0 - r_pos0; TSeqPos pos1 = plus1 ? 0 : l_pos1 - r_pos1; // Initialize 'next' positions // -- to determine if there are unaligned pieces TSeqPos next_l_pos0 = plus0 ? l_pos0 + l_len : l_pos0 - l_len; TSeqPos next_l_pos1 = plus1 ? l_pos1 + l_len : l_pos1 - l_len; l_idx0 = row0; l_idx1 = row1; TNumseg seg = l_seg; TSeqPos delta; while (true) { if (m_Starts[l_idx0] >= 0) { // if row0 is not gapped if (seg > l_seg) { // check for unaligned region / validate if (plus0) { delta = m_Starts[l_idx0] - next_l_pos0; next_l_pos0 = m_Starts[l_idx0] + l_len; } else { delta = next_l_pos0 - m_Starts[l_idx0] - l_len + 1; next_l_pos0 = m_Starts[l_idx0] - 1; } if (delta > 0) { // unaligned region if (plus0) { pos0 += delta; } else { pos0 -= delta; } } else if (delta < 0) { // invalid segment string errstr = string("CAlnMap::GetResidueIndexMap():") + " Starts are not consistent!" + " Row=" + NStr::IntToString(row0) + " Seg=" + NStr::IntToString(seg); NCBI_THROW(CAlnException, eInvalidDenseg, errstr); } } if (m_Starts[l_idx1] >= 0) { // if row1 is not gapped if (seg > l_seg) { // check for unaligned region / validate if (plus1) { delta = m_Starts[l_idx1] - next_l_pos1; next_l_pos1 = m_Starts[l_idx1] + l_len; } else { delta = next_l_pos1 - m_Starts[l_idx1] - l_len + 1; next_l_pos1 = m_Starts[l_idx1] - 1; } if (delta > 0) { // unaligned region if (plus1) { pos1 += delta; } else { pos1 -= delta; } } else if (delta < 0) { // invalid segment string errstr = string("CAlnMap::GetResidueIndexMap():") + " Starts are not consistent!" + " Row=" + NStr::IntToString(row1) + " Seg=" + NStr::IntToString(seg); NCBI_THROW(CAlnException, eInvalidDenseg, errstr); } } if (plus0) { if (plus1) { // if row1 on + while (l_len--) { result[pos0++] = pos1++; } } else { // if row1 on - while (l_len--) { result[pos0++] = pos1--; } } } else { // if row0 on - if (plus1) { // if row1 on + while (l_len--) { result[pos0--] = pos1++; } } else { // if row1 on - while (l_len--) { result[pos0--] = pos1--; } } } } else { if (plus0) { pos0 += l_len; } else { pos0 -= l_len; } } } // iterate to next segment seg++; l_idx0 += m_NumRows; l_idx1 += m_NumRows; if (seg < r_seg) { l_len = m_Lens[seg]; } else if (seg == r_seg) { l_len = r_len; } else { break; } } // finally, set the ranges for the two sequences rng0.SetFrom(plus0 ? l_pos0 : r_pos0); rng0.SetTo(plus0 ? r_pos0 : l_pos0); rng1.SetFrom(plus1 ? l_pos1 : r_pos1); rng1.SetTo(plus1 ? r_pos1 : l_pos1);}TSignedSeqPos CAlnMap::GetSeqAlnStart(TNumrow row) const{ if (IsSetAnchor()) { TNumseg seg = -1; while (++seg < m_AlnSegIdx.size()) { if (m_Starts[m_AlnSegIdx[seg] * m_NumRows + row] >= 0) { return GetAlnStart(seg); } } return -1; } else { return GetAlnStart(x_GetSeqLeftSeg(row)); }}TSignedSeqPos CAlnMap::GetSeqAlnStop(TNumrow row) const{ if (IsSetAnchor()) { TNumseg seg = m_AlnSegIdx.size(); while (seg--) { if (m_Starts[m_AlnSegIdx[seg] * m_NumRows + row] >= 0) { return GetAlnStop(seg); } } return -1; } else { return GetAlnStop(x_GetSeqRightSeg(row)); }}CRef<CAlnMap::CAlnChunkVec>CAlnMap::GetAlnChunks(TNumrow row, const TSignedRange& range, TGetChunkFlags flags) const{ CRef<CAlnChunkVec> vec(new CAlnChunkVec(*this, row)); // boundaries check if (range.GetTo() < 0 || range.GetFrom() > (TSignedSeqPos) GetAlnStop(GetNumSegs() - 1)) { return vec; } // determine the participating segments range TNumseg first_seg, last_seg, aln_seg; if (range.GetFrom() < 0) { first_seg = 0; } else { first_seg = x_GetRawSegFromSeg(aln_seg = GetSeg(range.GetFrom())); if ( !(flags & fDoNotTruncateSegs) ) { vec->m_LeftDelta = range.GetFrom() - GetAlnStart(aln_seg); } } if ((TSeqPos)range.GetTo() > GetAlnStop(GetNumSegs()-1)) { last_seg = m_NumSegs-1; } else { last_seg = x_GetRawSegFromSeg(aln_seg = GetSeg(range.GetTo())); if ( !(flags & fDoNotTruncateSegs) ) { vec->m_RightDelta = GetAlnStop(aln_seg) - range.GetTo(); } } x_GetChunks(vec, row, first_seg, last_seg, flags); return vec;}CRef<CAlnMap::CAlnChunkVec>CAlnMap::GetSeqChunks(TNumrow row, const TSignedRange& range, TGetChunkFlags flags) const{ CRef<CAlnChunkVec> vec(new CAlnChunkVec(*this, row)); // boundaries check if (range.GetTo() < GetSeqStart(row) || range.GetFrom() > GetSeqStop(row)) { return vec; } // determine the participating segments range TNumseg first_seg, last_seg; if (range.GetFrom() < GetSeqStart(row)) { if (IsPositiveStrand(row)) { first_seg = 0; } else { last_seg = m_NumSegs - 1;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?