seq_align_mapper.cpp
来自「ncbi源码」· C++ 代码 · 共 1,238 行 · 第 1/3 页
CPP
1,238 行
} // Leftmost first if (x->m_Src_from != y->m_Src_from) { return x->m_Src_from < y->m_Src_from; } // Longest first return x->m_Src_to > y->m_Src_to;}void CSeq_align_Mapper::Convert(CSeq_loc_Conversion_Set& cvts, unsigned int loc_index_shift){ m_DstAlign.Reset(); if (m_SubAligns.size() > 0) { NON_CONST_ITERATE(TSubAligns, it, m_SubAligns) { it->Convert(cvts, loc_index_shift); loc_index_shift += it->m_Dim; } return; } x_ConvertAlign(cvts, loc_index_shift);}void CSeq_align_Mapper::x_ConvertAlign(CSeq_loc_Conversion_Set& cvts, unsigned int loc_index_shift){ if (cvts.m_CvtByIndex.size() == 0) { // Single mapping _ASSERT(cvts.m_SingleConv); x_ConvertRow(*cvts.m_SingleConv, cvts.m_SingleIndex - loc_index_shift); return; } CSeq_loc_Conversion_Set::TConvByIndex::iterator idx_it = cvts.m_CvtByIndex.lower_bound(loc_index_shift); for ( ; idx_it != cvts.m_CvtByIndex.end() && idx_it->first < loc_index_shift + m_Dim; ++idx_it) { x_ConvertRow(idx_it->second, idx_it->first - loc_index_shift); }}void CSeq_align_Mapper::x_ConvertRow(CSeq_loc_Conversion& cvt, size_t row){ CSeq_id_Handle dst_id; TSegments::iterator seg_it = m_Segs.begin(); for ( ; seg_it != m_Segs.end(); ) { if (seg_it->m_Rows.size() <= row) { // No such row in the current segment ++seg_it; m_AlignFlags = eAlign_MultiDim; continue; } CSeq_id_Handle seg_id = x_ConvertSegment(seg_it, cvt, row); if (dst_id) { if (dst_id != seg_id && m_AlignFlags == eAlign_Normal) { m_AlignFlags = eAlign_MultiId; } dst_id = seg_id; } }}void CSeq_align_Mapper::x_ConvertRow(TIdMap& cvts, size_t row){ CSeq_id_Handle dst_id; TSegments::iterator seg_it = m_Segs.begin(); for ( ; seg_it != m_Segs.end(); ) { if (seg_it->m_Rows.size() <= row) { // No such row in the current segment ++seg_it; m_AlignFlags = eAlign_MultiDim; continue; } CSeq_id_Handle seg_id = x_ConvertSegment(seg_it, cvts, row); if (dst_id) { if (dst_id != seg_id && m_AlignFlags == eAlign_Normal) { m_AlignFlags = eAlign_MultiId; } dst_id = seg_id; } }}CSeq_id_HandleCSeq_align_Mapper::x_ConvertSegment(TSegments::iterator& seg_it, CSeq_loc_Conversion& cvt, size_t row){ TSegments::iterator old_it = seg_it; ++seg_it; SAlignment_Segment::SAlignment_Row& aln_row = old_it->m_Rows[row]; if (aln_row.m_Id != cvt.m_Src_id_Handle) { return aln_row.m_Id; } if (aln_row.m_Start == kInvalidSeqPos) { // ??? Skipped row - change ID aln_row.m_Id = cvt.m_Dst_id_Handle; aln_row.SetMapped(); return aln_row.m_Id; } TRange rg(aln_row.m_Start, aln_row.m_Start + old_it->m_Len - 1); if (!cvt.ConvertInterval(rg.GetFrom(), rg.GetTo(), aln_row.m_Strand) ) { // Do not erase the segment, just change the row ID and reset start aln_row.m_Start = kInvalidSeqPos; aln_row.m_Id = cvt.m_Dst_id_Handle; aln_row.SetMapped(); return aln_row.m_Id; } // At least part of the interval was converted. TSeqPos dl = cvt.m_Src_from <= rg.GetFrom() ? 0 : cvt.m_Src_from - rg.GetFrom(); TSeqPos dr = cvt.m_Src_to >= rg.GetTo() ? 0 : rg.GetTo() - cvt.m_Src_to; if (dl > 0) { // Add segment for the skipped range SAlignment_Segment& lseg = x_InsertSeg(seg_it, dl, old_it->m_Rows.size()); for (size_t r = 0; r < old_it->m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& lrow = lseg.CopyRow(r, old_it->m_Rows[r]); if (r == row) { lrow.m_Start = kInvalidSeqPos; lrow.m_Id = cvt.m_Dst_id_Handle; } else if (lrow.m_Start != kInvalidSeqPos && !lrow.SameStrand(aln_row)) { // Adjust start for minus strand lrow.m_Start += old_it->m_Len - lseg.m_Len; } } } rg.SetFrom(rg.GetFrom() + dl); SAlignment_Segment& mseg = x_InsertSeg(seg_it, rg.GetLength() - dr, old_it->m_Rows.size()); if (!dl && !dr) { // copy scores if there's no truncation mseg.SetScores(old_it->m_Scores); } for (size_t r = 0; r < old_it->m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& mrow = mseg.CopyRow(r, old_it->m_Rows[r]); if (r == row) { // translate id and coords mrow.m_Id = cvt.m_Dst_id_Handle; mrow.m_Start = cvt.m_LastRange.GetFrom(); mrow.m_IsSetStrand |= (cvt.m_LastStrand != eNa_strand_unknown); mrow.m_Strand = cvt.m_LastStrand; mrow.SetMapped(); mseg.m_HaveStrands |= mrow.m_IsSetStrand; } else { if (mrow.m_Start != kInvalidSeqPos) { if (mrow.SameStrand(aln_row)) { mrow.m_Start += dl; } else { mrow.m_Start += old_it->m_Len - dl - mseg.m_Len; } } } } cvt.m_LastType = cvt.eMappedObjType_not_set; dl += rg.GetLength() - dr; rg.SetFrom(rg.GetTo() - dr); if (dr > 0) { // Add the remaining unmapped range SAlignment_Segment& rseg = x_InsertSeg(seg_it, dr, old_it->m_Rows.size()); for (size_t r = 0; r < old_it->m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& rrow = rseg.CopyRow(r, old_it->m_Rows[r]); if (r == row) { rrow.m_Start = kInvalidSeqPos; rrow.m_Id = cvt.m_Dst_id_Handle; } else { if (rrow.SameStrand(aln_row)) { rrow.m_Start += dl; } } } } m_Segs.erase(old_it); return cvt.m_Dst_id_Handle;}CSeq_id_HandleCSeq_align_Mapper::x_ConvertSegment(TSegments::iterator& seg_it, TIdMap& id_map, size_t row){ TSegments::iterator old_it = seg_it; SAlignment_Segment& seg = *old_it; ++seg_it; SAlignment_Segment::SAlignment_Row& aln_row = seg.m_Rows[row]; if (aln_row.m_Start == kInvalidSeqPos) { // skipped row return aln_row.m_Id; } TRange rg(aln_row.m_Start, aln_row.m_Start + seg.m_Len - 1); TIdMap::iterator id_it = id_map.find(aln_row.m_Id); if (id_it == id_map.end()) { // ID not found in the segment, leave the row unchanged return aln_row.m_Id; } TRangeMap& rmap = id_it->second; if ( rmap.empty() ) { // No mappings for this segment return aln_row.m_Id; } // Sorted mappings typedef vector< CRef<CSeq_loc_Conversion> > TSortedConversions; TSortedConversions cvts; for (TRangeMap::iterator rg_it = rmap.begin(rg); rg_it; ++rg_it) { cvts.push_back(rg_it->second); } sort(cvts.begin(), cvts.end(), CConversionRef_Less()); bool mapped = false; CSeq_id_Handle dst_id; EAlignFlags align_flags = eAlign_Normal; TSeqPos left_shift = 0; for (size_t cvt_idx = 0; cvt_idx < cvts.size(); ++cvt_idx) { CSeq_loc_Conversion& cvt = *cvts[cvt_idx]; if (!cvt.ConvertInterval(rg.GetFrom(), rg.GetTo(), aln_row.m_Strand) ) { continue; } // Check destination id if ( dst_id ) { if (cvt.m_Dst_id_Handle != dst_id) { align_flags = eAlign_MultiId; } } dst_id = cvt.m_Dst_id_Handle; // At least part of the interval was converted. TSeqPos dl = cvt.m_Src_from <= rg.GetFrom() ? 0 : cvt.m_Src_from - rg.GetFrom(); TSeqPos dr = cvt.m_Src_to >= rg.GetTo() ? 0 : rg.GetTo() - cvt.m_Src_to; if (dl > 0) { // Add segment for the skipped range SAlignment_Segment& lseg = x_InsertSeg(seg_it, dl, seg.m_Rows.size()); for (size_t r = 0; r < seg.m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& lrow = lseg.CopyRow(r, seg.m_Rows[r]); if (r == row) { lrow.m_Start = kInvalidSeqPos; lrow.m_Id = dst_id; } else if (lrow.m_Start != kInvalidSeqPos) { if (lrow.SameStrand(aln_row)) { lrow.m_Start += left_shift; } else { lrow.m_Start += seg.m_Len - lseg.m_Len - left_shift; } } } } left_shift += dl; SAlignment_Segment& mseg = x_InsertSeg(seg_it, rg.GetLength() - dl - dr, seg.m_Rows.size()); if (!dl && !dr) { // copy scores if there's no truncation mseg.SetScores(seg.m_Scores); } for (size_t r = 0; r < seg.m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& mrow = mseg.CopyRow(r, seg.m_Rows[r]); if (r == row) { // translate id and coords mrow.m_Id = cvt.m_Dst_id_Handle; mrow.m_Start = cvt.m_LastRange.GetFrom(); mrow.m_IsSetStrand |= (cvt.m_LastStrand != eNa_strand_unknown); mrow.m_Strand = cvt.m_LastStrand; mrow.SetMapped(); mseg.m_HaveStrands |= mrow.m_IsSetStrand; } else { if (mrow.m_Start != kInvalidSeqPos) { if (mrow.SameStrand(aln_row)) { mrow.m_Start += left_shift; } else { mrow.m_Start += seg.m_Len - left_shift - mseg.m_Len; } } } } cvt.m_LastType = cvt.eMappedObjType_not_set; mapped = true; left_shift += mseg.m_Len; rg.SetFrom(aln_row.m_Start + left_shift); } if (align_flags == eAlign_MultiId && m_AlignFlags == eAlign_Normal) { m_AlignFlags = align_flags; } if ( !mapped ) { // Do not erase the segment, just change the row ID and reset start seg.m_Rows[row].m_Start = kInvalidSeqPos; seg.m_Rows[row].m_Id = rmap.begin()->second->m_Dst_id_Handle; seg.m_Rows[row].SetMapped(); return seg.m_Rows[row].m_Id; } if (rg.GetFrom() <= rg.GetTo()) { // Add the remaining unmapped range SAlignment_Segment& rseg = x_InsertSeg(seg_it, rg.GetLength(), seg.m_Rows.size()); for (size_t r = 0; r < seg.m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& rrow = rseg.CopyRow(r, seg.m_Rows[r]); if (r == row) { rrow.m_Start = kInvalidSeqPos; rrow.m_Id = dst_id; } else if (rrow.m_Start != kInvalidSeqPos) { if (rrow.SameStrand(aln_row)) { rrow.m_Start += left_shift; } } } } m_Segs.erase(old_it); return align_flags == eAlign_MultiId ? CSeq_id_Handle() : dst_id;}// Mapping through CSeq_loc_Mappervoid CSeq_align_Mapper::Convert(CSeq_loc_Mapper& mapper){ m_DstAlign.Reset(); if (m_SubAligns.size() > 0) { NON_CONST_ITERATE(TSubAligns, it, m_SubAligns) { it->Convert(mapper); } return; } x_ConvertAlign(mapper);}void CSeq_align_Mapper::x_ConvertAlign(CSeq_loc_Mapper& mapper){ if (m_Segs.size() == 0) { return; } for (size_t row_idx = 0; row_idx < m_Dim; ++row_idx) { x_ConvertRow(mapper, row_idx); }}void CSeq_align_Mapper::x_ConvertRow(CSeq_loc_Mapper& mapper, size_t row){ CSeq_id_Handle dst_id; TSegments::iterator seg_it = m_Segs.begin(); for ( ; seg_it != m_Segs.end(); ) { if (seg_it->m_Rows.size() <= row) { // No such row in the current segment ++seg_it; m_AlignFlags = eAlign_MultiDim; continue; } CSeq_id_Handle seg_id = x_ConvertSegment(seg_it, mapper, row); if (dst_id) { if (dst_id != seg_id && m_AlignFlags == eAlign_Normal) { m_AlignFlags = eAlign_MultiId; } dst_id = seg_id; } }}CSeq_id_HandleCSeq_align_Mapper::x_ConvertSegment(TSegments::iterator& seg_it, CSeq_loc_Mapper& mapper, size_t row){ TSegments::iterator old_it = seg_it; SAlignment_Segment& seg = *old_it; ++seg_it; SAlignment_Segment::SAlignment_Row& aln_row = seg.m_Rows[row]; if (aln_row.m_Start == kInvalidSeqPos) { // skipped row return aln_row.m_Id; } CSeq_loc_Mapper::TIdMap::iterator id_it = mapper.m_IdMap.find(aln_row.m_Id); if (id_it == mapper.m_IdMap.end()) { // ID not found in the segment, leave the row unchanged return aln_row.m_Id; } CSeq_loc_Mapper::TRangeMap& rmap = id_it->second; if ( rmap.empty() ) { // No mappings for this segment
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?