seq_align_mapper.cpp
来自「ncbi源码」· C++ 代码 · 共 1,238 行 · 第 1/3 页
CPP
1,238 行
return aln_row.m_Id; } // Sorted mappings typedef vector< CRef<CMappingRange> > TSortedMappings; TSortedMappings mappings; CSeq_loc_Mapper::TRangeMap::iterator rg_it = rmap.begin(); for ( ; rg_it; ++rg_it) { mappings.push_back(rg_it->second); } sort(mappings.begin(), mappings.end(), CMappingRangeRef_Less()); bool mapped = false; CSeq_id_Handle dst_id; EAlignFlags align_flags = eAlign_Normal; int width_flag = mapper.m_UseWidth ? mapper.m_Widths[aln_row.m_Id] : 0; int dst_width = width_flag ? 3 : 1; TSeqPos start = aln_row.m_Start; if (width_flag & CSeq_loc_Mapper::fWidthProtToNuc) { dst_width = 1; if (aln_row.m_Width == 3) { start *= 3; } } else if (!width_flag && aln_row.m_Width == 3) { dst_width = 3; start *= 3; } TSeqPos stop = start + seg.m_Len - 1; TSeqPos left_shift = 0; for (size_t map_idx = 0; map_idx < mappings.size(); ++map_idx) { CRef<CMappingRange> mapping(mappings[map_idx]); // Adjust mapping coordinates according to width if (width_flag & CSeq_loc_Mapper::fWidthProtToNuc) { if (width_flag & CSeq_loc_Mapper::fWidthNucToProt) { // Copy mapping mapping.Reset(new CMappingRange(*mappings[map_idx])); mapping->m_Src_from *= 3; mapping->m_Src_to = (mapping->m_Src_to + 1)*3 - 1; mapping->m_Dst_from *= 3; } } else if (!width_flag && aln_row.m_Width == 3) { // Copy mapping mapping.Reset(new CMappingRange(*mappings[map_idx])); mapping->m_Src_from *= 3; mapping->m_Src_to = (mapping->m_Src_to + 1)*3 - 1; mapping->m_Dst_from *= 3; } if (!mapping->CanMap(start, stop, aln_row.m_IsSetStrand, aln_row.m_Strand)) { // Mapping does not apply to this segment/row continue; } // Check destination id if ( dst_id ) { if (mapping->m_Dst_id_Handle != dst_id) { align_flags = eAlign_MultiId; } } dst_id = mapping->m_Dst_id_Handle; // At least part of the interval was converted. Calculate // trimming coords, trim each row. TSeqPos dl = mapping->m_Src_from <= start ? 0 : mapping->m_Src_from - start; TSeqPos dr = mapping->m_Src_to >= stop ? 0 : stop - mapping->m_Src_to; if (dl > 0) { // Add segment for the skipped range SAlignment_Segment& lseg = x_InsertSeg(seg_it, dl, seg.m_Rows.size()); for (size_t r = 0; r < seg.m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& lrow = lseg.CopyRow(r, seg.m_Rows[r]); if (r == row) { lrow.m_Start = kInvalidSeqPos; lrow.m_Id = dst_id; } else if (lrow.m_Start != kInvalidSeqPos) { int width = seg.m_Rows[r].m_Width ? seg.m_Rows[r].m_Width : 1; if (lrow.SameStrand(aln_row)) { lrow.m_Start += left_shift/width; } else { lrow.m_Start += (seg.m_Len - lseg.m_Len - left_shift)/width; } } } } start += dl; left_shift += dl; // At least part of the interval was converted. SAlignment_Segment& mseg = x_InsertSeg(seg_it, stop - dr - start + 1, seg.m_Rows.size()); if (!dl && !dr) { // copy scores if there's no truncation mseg.SetScores(seg.m_Scores); } for (size_t r = 0; r < seg.m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& mrow = mseg.CopyRow(r, seg.m_Rows[r]); if (r == row) { // translate id and coords CMappingRange::TRange mapped_rg = mapping->Map_Range(start, stop - dr); ENa_strand dst_strand = eNa_strand_unknown; mapping->Map_Strand( aln_row.m_IsSetStrand, aln_row.m_Strand, &dst_strand); mrow.m_Id = mapping->m_Dst_id_Handle; mrow.m_Start = mapped_rg.GetFrom()/dst_width; mrow.m_IsSetStrand |= (dst_strand != eNa_strand_unknown); mrow.m_Strand = dst_strand; mrow.SetMapped(); mseg.m_HaveStrands |= mrow.m_IsSetStrand; } else { if (mrow.m_Start != kInvalidSeqPos) { int width = seg.m_Rows[r].m_Width ? seg.m_Rows[r].m_Width : 1; if (mrow.SameStrand(aln_row)) { mrow.m_Start += left_shift/width; } else { mrow.m_Start += (seg.m_Len - left_shift - mseg.m_Len)/width; } } } } left_shift += mseg.m_Len; start += mseg.m_Len; mapped = true; } if (align_flags == eAlign_MultiId && m_AlignFlags == eAlign_Normal) { m_AlignFlags = align_flags; } if ( !mapped ) { // Do not erase the segment, just change the row ID and reset start seg.m_Rows[row].m_Start = kInvalidSeqPos; seg.m_Rows[row].m_Id = rmap.begin()->second->m_Dst_id_Handle; seg.m_Rows[row].SetMapped(); return seg.m_Rows[row].m_Id; } if (start <= stop) { // Add the remaining unmapped range SAlignment_Segment& rseg = x_InsertSeg(seg_it, stop - start + 1, seg.m_Rows.size()); for (size_t r = 0; r < seg.m_Rows.size(); ++r) { SAlignment_Segment::SAlignment_Row& rrow = rseg.CopyRow(r, seg.m_Rows[r]); if (r == row) { rrow.m_Start = kInvalidSeqPos; rrow.m_Id = dst_id; } else if (rrow.m_Start != kInvalidSeqPos) { int width = seg.m_Rows[r].m_Width ? seg.m_Rows[r].m_Width : 1; if (rrow.SameStrand(aln_row)) { rrow.m_Start += left_shift/width; } } } } m_Segs.erase(old_it); return align_flags == eAlign_MultiId ? CSeq_id_Handle() : dst_id;}// Get mapped alignmentvoid CSeq_align_Mapper::x_GetDstDendiag(CRef<CSeq_align>& dst) const{ TDendiag& diags = dst->SetSegs().SetDendiag(); ITERATE(TSegments, seg_it, m_Segs) { const SAlignment_Segment& seg = *seg_it; CRef<CDense_diag> diag(new CDense_diag); diag->SetDim(seg.m_Rows.size()); diag->SetLen(seg.m_Len); ITERATE(SAlignment_Segment::TRows, row, seg.m_Rows) { CRef<CSeq_id> id(new CSeq_id); id.Reset(&const_cast<CSeq_id&>(*row->m_Id.GetSeqId())); diag->SetIds().push_back(id); diag->SetStarts().push_back(row->GetSegStart()); if (seg.m_HaveStrands) { // per-segment strands diag->SetStrands().push_back(row->m_Strand); } } if ( seg.m_Scores.size() ) { diag->SetScores() = seg.m_Scores; } diags.push_back(diag); }}void CSeq_align_Mapper::x_GetDstDenseg(CRef<CSeq_align>& dst) const{ CDense_seg& dseg = dst->SetSegs().SetDenseg(); dseg.SetDim(m_Segs.front().m_Rows.size()); dseg.SetNumseg(m_Segs.size()); if ( m_Segs.front().m_Scores.size() ) { dseg.SetScores() = m_Segs.front().m_Scores; } ITERATE(SAlignment_Segment::TRows, row, m_Segs.front().m_Rows) { CRef<CSeq_id> id(new CSeq_id); id.Reset(&const_cast<CSeq_id&>(*row->m_Id.GetSeqId())); dseg.SetIds().push_back(id); if (m_HaveWidths) { dseg.SetWidths().push_back(row->m_Width); } } ITERATE(TSegments, seg_it, m_Segs) { dseg.SetLens().push_back(seg_it->m_Len); ITERATE(SAlignment_Segment::TRows, row, seg_it->m_Rows) { dseg.SetStarts().push_back(row->GetSegStart()); if (m_HaveStrands) { // per-alignment strands dseg.SetStrands().push_back(row->m_Strand); } } }}void CSeq_align_Mapper::x_GetDstStd(CRef<CSeq_align>& dst) const{ TStd& std_segs = dst->SetSegs().SetStd(); ITERATE(TSegments, seg_it, m_Segs) { CRef<CStd_seg> std_seg(new CStd_seg); std_seg->SetDim(seg_it->m_Rows.size()); if ( seg_it->m_Scores.size() ) { std_seg->SetScores() = seg_it->m_Scores; } ITERATE(SAlignment_Segment::TRows, row, seg_it->m_Rows) { CRef<CSeq_id> id(new CSeq_id); id.Reset(&const_cast<CSeq_id&>(*row->m_Id.GetSeqId())); std_seg->SetIds().push_back(id); CRef<CSeq_loc> loc(new CSeq_loc); if (row->m_Start == kInvalidSeqPos) { // empty loc->SetEmpty(*id); } else { // interval loc->SetInt().SetId(*id); TSeqPos len = seg_it->m_Len; if (row->m_Width) { len /= row->m_Width; } loc->SetInt().SetFrom(row->m_Start); // len may be 0 after dividing by width loc->SetInt().SetTo(row->m_Start + (len ? len - 1 : 0)); if (row->m_IsSetStrand) { loc->SetInt().SetStrand(row->m_Strand); } } std_seg->SetLoc().push_back(loc); } std_segs.push_back(std_seg); }}void CSeq_align_Mapper::x_GetDstPacked(CRef<CSeq_align>& dst) const{ CPacked_seg& pseg = dst->SetSegs().SetPacked(); pseg.SetDim(m_Segs.front().m_Rows.size()); pseg.SetNumseg(m_Segs.size()); if ( m_Segs.front().m_Scores.size() ) { pseg.SetScores() = m_Segs.front().m_Scores; } ITERATE(SAlignment_Segment::TRows, row, m_Segs.front().m_Rows) { CRef<CSeq_id> id(new CSeq_id); id.Reset(&const_cast<CSeq_id&>(*row->m_Id.GetSeqId())); pseg.SetIds().push_back(id); } ITERATE(TSegments, seg_it, m_Segs) { pseg.SetLens().push_back(seg_it->m_Len); ITERATE(SAlignment_Segment::TRows, row, seg_it->m_Rows) { pseg.SetStarts().push_back(row->GetSegStart()); pseg.SetPresent().push_back(row->m_Start != kInvalidSeqPos); if (m_HaveStrands) { pseg.SetStrands().push_back(row->m_Strand); } } }}void CSeq_align_Mapper::x_GetDstDisc(CRef<CSeq_align>& dst) const{ CSeq_align_set::Tdata& data = dst->SetSegs().SetDisc().Set(); ITERATE(TSubAligns, it, m_SubAligns) { data.push_back(it->GetDstAlign()); }}CRef<CSeq_align> CSeq_align_Mapper::GetDstAlign(void) const{ if (m_DstAlign) { return m_DstAlign; } CRef<CSeq_align> dst(new CSeq_align); dst->SetType(m_OrigAlign->GetType()); if (m_OrigAlign->IsSetDim()) { dst->SetDim(m_OrigAlign->GetDim()); } if (m_OrigAlign->IsSetScore()) { dst->SetScore() = m_OrigAlign->GetScore(); } if (m_OrigAlign->IsSetBounds()) { dst->SetBounds() = m_OrigAlign->GetBounds(); } switch ( m_OrigAlign->GetSegs().Which() ) { case CSeq_align::C_Segs::e_Dendiag: { x_GetDstDendiag(dst); break; } case CSeq_align::C_Segs::e_Denseg: { if (m_AlignFlags == eAlign_Normal) { x_GetDstDenseg(dst); } else { x_GetDstDendiag(dst); } break; } case CSeq_align::C_Segs::e_Std: { x_GetDstStd(dst); break; } case CSeq_align::C_Segs::e_Packed: { if (m_AlignFlags == eAlign_Normal) { x_GetDstPacked(dst); } else { x_GetDstDendiag(dst); } break; } case CSeq_align::C_Segs::e_Disc: { x_GetDstDisc(dst); break; } default: { dst->Assign(*m_OrigAlign); break; } } return m_DstAlign = dst;}END_SCOPE(objects)END_NCBI_SCOPE/** ---------------------------------------------------------------------------* $Log: seq_align_mapper.cpp,v $* Revision 1000.1 2004/06/01 19:23:43 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10** Revision 1.10 2004/05/27 17:52:11 grichenk* Fixed warnings** Revision 1.9 2004/05/26 14:57:47 ucko* Change some types from unsigned int to size_t for consistency with STL* containers' size() methods.** Revision 1.8 2004/05/26 14:29:20 grichenk* Redesigned CSeq_align_Mapper: preserve non-mapping intervals,* fixed strands handling, improved performance.** Revision 1.7 2004/05/21 21:42:12 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.6 2004/05/13 19:10:57 grichenk* Preserve scores in mapped alignments** Revision 1.5 2004/05/10 20:22:24 grichenk* Fixed more warnings (removed inlines)** Revision 1.4 2004/04/12 14:35:59 grichenk* Fixed mapping of alignments between nucleotides and proteins** Revision 1.3 2004/04/07 18:36:12 grichenk* Fixed std-seg mapping** Revision 1.2 2004/04/01 20:11:17 rsmith* add missing break to switch on seq-id types.** Revision 1.1 2004/03/30 15:40:35 grichenk* Initial revision*** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?