seq_loc_mapper.cpp
来自「ncbi源码」· C++ 代码 · 共 1,895 行 · 第 1/5 页
CPP
1,895 行
TSeqPos top_start = kInvalidSeqPos; TSeqPos top_stop = kInvalidSeqPos; TSeqPos src_seg_start = kInvalidSeqPos; TSeqPos last_stop = kInvalidSeqPos; CConstRef<CSeq_id> src_id; TSeqPos src_from = 0; TSeqPos src_len = 0; TSeqPos dst_from = 0; TSeqPos dst_len = 0; CConstRef<CSeq_id> dst_id; ENa_strand dst_strand = eNa_strand_unknown; for ( ; seg_it; ++seg_it) { _ASSERT(seg_it.GetType() == CSeqMap::eSeqRef); if (seg_it.GetPosition() > top_stop || !src_id) { // New top-level segment top_start = seg_it.GetPosition(); top_stop = seg_it.GetEndPosition() - 1; if (top_id) { // Top level is a bioseq src_id.Reset(top_id); src_seg_start = top_start; } else { // Top level is a seq-loc, positions are // on the first-level references src_id = seg_it.GetRefSeqid().GetSeqId(); src_seg_start = seg_it.GetRefPosition(); continue; } } if (seg_it.GetPosition() >= last_stop) { x_NextMappingRange(*src_id, src_from, src_len, eNa_strand_unknown, *dst_id, dst_from, dst_len, dst_strand); _ASSERT(src_len == 0 && dst_len == 0); } src_from = src_seg_start + seg_it.GetPosition() - top_start; src_len = seg_it.GetLength(); dst_from = seg_it.GetRefPosition(); dst_len = src_len; dst_id.Reset(seg_it.GetRefSeqid().GetSeqId()); dst_strand = seg_it.GetRefMinusStrand() ? eNa_strand_minus : eNa_strand_unknown; last_stop = seg_it.GetEndPosition(); } if (src_len > 0) { x_NextMappingRange(*src_id, src_from, src_len, eNa_strand_unknown, *dst_id, dst_from, dst_len, dst_strand); }}CSeq_loc_Mapper::TRangeIteratorCSeq_loc_Mapper::x_BeginMappingRanges(CSeq_id_Handle id, TSeqPos from, TSeqPos to){ TIdMap::iterator ranges = m_IdMap.find(id); if (ranges == m_IdMap.end()) { return TRangeIterator(); } return ranges->second.begin(TRange(from, to));}bool CSeq_loc_Mapper::x_MapInterval(const CSeq_id& src_id, TRange src_rg, bool is_set_strand, ENa_strand src_strand, TRangeFuzz orig_fuzz){ bool res = false; if (m_UseWidth && m_Widths[CSeq_id_Handle::GetHandle(src_id)] & fWidthProtToNuc) { src_rg = TRange(src_rg.GetFrom()*3, src_rg.GetTo()*3 + 2); } CSeq_id_Handle src_idh = CSeq_id_Handle::GetHandle(src_id); // Sorted mapping ranges typedef vector< CRef<CMappingRange> > TSortedMappings; TSortedMappings mappings; TRangeIterator rg_it = x_BeginMappingRanges( src_idh, src_rg.GetFrom(), src_rg.GetTo()); for ( ; rg_it; ++rg_it) { mappings.push_back(rg_it->second); } sort(mappings.begin(), mappings.end(), CMappingRangeRef_Less()); TSeqPos last_src_to = 0; for (size_t idx = 0; idx < mappings.size(); ++idx) { const CMappingRange& cvt = *mappings[idx]; if ( !cvt.CanMap(src_rg.GetFrom(), src_rg.GetTo(), is_set_strand, src_strand) ) { continue; } TSeqPos from = src_rg.GetFrom(); TSeqPos to = src_rg.GetTo(); TRangeFuzz fuzz = cvt.Map_Fuzz(orig_fuzz); if (from < cvt.m_Src_from) { from = cvt.m_Src_from; // Set partial only if a non-empty range is to be skipped if (cvt.m_Src_from > last_src_to + 1) { if ( cvt.m_Reverse ) { if (!fuzz.second) { fuzz.second.Reset(new CInt_fuzz); } fuzz.second->SetLim(CInt_fuzz::eLim_gt); } else { if (!fuzz.first) { fuzz.first.Reset(new CInt_fuzz); } fuzz.first->SetLim(CInt_fuzz::eLim_lt); } m_Partial = true; } } last_src_to = cvt.m_Src_to; if ( to > cvt.m_Src_to ) { to = cvt.m_Src_to; // Set partial only if a non-empty range is to be skipped if (idx == mappings.size() - 1 || mappings[idx+1]->m_Src_from > last_src_to + 1) { if ( cvt.m_Reverse ) { if (!fuzz.first) { fuzz.first.Reset(new CInt_fuzz); } fuzz.first->SetLim(CInt_fuzz::eLim_lt); } else { if (!fuzz.second) { fuzz.second.Reset(new CInt_fuzz); } fuzz.second->SetLim(CInt_fuzz::eLim_gt); } m_Partial = true; } } if ( from > to ) { continue; } TRange rg = cvt.Map_Range(from, to); ENa_strand dst_strand; bool is_set_dst_strand = cvt.Map_Strand(is_set_strand, src_strand, &dst_strand); x_GetMappedRanges(cvt.m_Dst_id_Handle, is_set_dst_strand ? int(dst_strand) + 1 : 0) .push_back(TRangeWithFuzz(rg, fuzz)); res = true; } return res;}void CSeq_loc_Mapper::x_PushLocToDstMix(CRef<CSeq_loc> loc){ _ASSERT(loc); if ( !m_Dst_loc || !m_Dst_loc->IsMix() ) { CRef<CSeq_loc> tmp = m_Dst_loc; m_Dst_loc.Reset(new CSeq_loc); m_Dst_loc->SetMix(); if ( tmp ) { m_Dst_loc->SetMix().Set().push_back(tmp); } } CSeq_loc_mix::Tdata& mix = m_Dst_loc->SetMix().Set(); if ( loc->IsNull() ) { if ( m_GapFlag == eGapRemove ) { return; } if ( mix.size() > 0 && (*mix.rbegin())->IsNull() ) { // do not create duplicate NULLs return; } } mix.push_back(loc);}CRef<CSeq_loc> CSeq_loc_Mapper::Map(const CSeq_loc& src_loc){ m_Dst_loc.Reset(); m_Partial = false; // reset for each location x_MapSeq_loc(src_loc); x_PushRangesToDstMix(); x_OptimizeSeq_loc(m_Dst_loc); return m_Dst_loc;}CRef<CSeq_align> CSeq_loc_Mapper::Map(const CSeq_align& src_align){ m_Dst_loc.Reset(); m_Partial = false; // reset for each location return x_MapSeq_align(src_align);}void CSeq_loc_Mapper::x_MapSeq_loc(const CSeq_loc& src_loc){ switch ( src_loc.Which() ) { case CSeq_loc::e_Null: if (m_GapFlag == eGapRemove) { return; } // Proceed to seq-loc duplication case CSeq_loc::e_not_set: case CSeq_loc::e_Feat: { x_PushRangesToDstMix(); CRef<CSeq_loc> loc(new CSeq_loc); loc->Assign(src_loc); x_PushLocToDstMix(loc); break; } case CSeq_loc::e_Empty: { bool res = false; TRangeIterator mit = x_BeginMappingRanges( CSeq_id_Handle::GetHandle(src_loc.GetEmpty()), TRange::GetWhole().GetFrom(), TRange::GetWhole().GetTo()); for ( ; mit; ++mit) { CMappingRange& cvt = *mit->second; if ( cvt.GoodSrcId(src_loc.GetEmpty()) ) { x_GetMappedRanges( CSeq_id_Handle::GetHandle(src_loc.GetEmpty()), 0) .push_back(TRangeWithFuzz(TRange::GetEmpty(), TRangeFuzz(kEmptyFuzz, kEmptyFuzz))); res = true; break; } } if ( !res ) { if ( m_KeepNonmapping ) { x_PushRangesToDstMix(); CRef<CSeq_loc> loc(new CSeq_loc); loc->Assign(src_loc); x_PushLocToDstMix(loc); } else { m_Partial = true; } } break; } case CSeq_loc::e_Whole: { const CSeq_id& src_id = src_loc.GetWhole(); // Convert to the allowed master seq interval TSeqPos src_to = kInvalidSeqPos; if ( m_Scope ) { CBioseq_Handle bh = m_Scope->GetBioseqHandle(src_id); src_to = bh ? bh.GetBioseqLength() : kInvalidSeqPos; } bool res = x_MapInterval(src_id, TRange(0, src_to), false, eNa_strand_unknown, TRangeFuzz(kEmptyFuzz, kEmptyFuzz)); if ( !res ) { if ( m_KeepNonmapping ) { x_PushRangesToDstMix(); CRef<CSeq_loc> loc(new CSeq_loc); loc->Assign(src_loc); x_PushLocToDstMix(loc); } else { m_Partial = true; } } break; } case CSeq_loc::e_Int: { const CSeq_interval& src_int = src_loc.GetInt(); TRangeFuzz fuzz(kEmptyFuzz, kEmptyFuzz); if ( src_int.IsSetFuzz_from() ) { fuzz.first.Reset(new CInt_fuzz); fuzz.first->Assign(src_int.GetFuzz_from()); } if ( src_int.IsSetFuzz_to() ) { fuzz.second.Reset(new CInt_fuzz); fuzz.second->Assign(src_int.GetFuzz_to()); } bool res = x_MapInterval(src_int.GetId(), TRange(src_int.GetFrom(), src_int.GetTo()), src_int.IsSetStrand(), src_int.IsSetStrand() ? src_int.GetStrand() : eNa_strand_unknown, fuzz); if ( !res ) { if ( m_KeepNonmapping ) { x_PushRangesToDstMix(); CRef<CSeq_loc> loc(new CSeq_loc); loc->Assign(src_loc); x_PushLocToDstMix(loc); } else { m_Partial = true; } } break; } case CSeq_loc::e_Pnt: { const CSeq_point& pnt = src_loc.GetPnt(); TRangeFuzz fuzz(kEmptyFuzz, kEmptyFuzz); if ( pnt.IsSetFuzz() ) { fuzz.first.Reset(new CInt_fuzz); fuzz.first->Assign(pnt.GetFuzz()); } bool res = x_MapInterval(pnt.GetId(), TRange(pnt.GetPoint(), pnt.GetPoint()), pnt.IsSetStrand(), pnt.IsSetStrand() ? pnt.GetStrand() : eNa_strand_unknown, fuzz); if ( !res ) { if ( m_KeepNonmapping ) { x_PushRangesToDstMix(); CRef<CSeq_loc> loc(new CSeq_loc); loc->Assign(src_loc); x_PushLocToDstMix(loc); } else { m_Partial = true; } } break; } case CSeq_loc::e_Packed_int: { const CPacked_seqint::Tdata& src_ints = src_loc.GetPacked_int().Get(); ITERATE ( CPacked_seqint::Tdata, i, src_ints ) { const CSeq_interval& si = **i; TRangeFuzz fuzz(kEmptyFuzz, kEmptyFuzz); if ( si.IsSetFuzz_from() ) { fuzz.first.Reset(new CInt_fuzz); fuzz.first->Assign(si.GetFuzz_from()); } if ( si.IsSetFuzz_to() ) { fuzz.second.Reset(new CInt_fuzz); fuzz.second->Assign(si.GetFuzz_to()); } bool res = x_MapInterval(si.GetId(), TRange(si.GetFrom(), si.GetTo()), si.IsSetStrand(), si.IsSetStrand() ? si.GetStrand() : eNa_strand_unknown, fuzz); if ( !res ) { if ( m_KeepNonmapping ) { x_PushRangesToDstMix(); x_GetMappedRanges(CSeq_id_Handle::GetHandle(si.GetId()), si.IsSetStrand() ? int(si.GetStrand()) + 1 : 0) .push_back(TRangeWithFuzz( TRange(si.GetFrom(), si.GetTo()), fuzz)); } else { m_Partial = true; } } } break; } case CSeq_loc::e_Packed_pnt: { const CPacked_seqpnt& src_pack_pnts = src_loc.GetPacked_pnt(); const CPacked_seqpnt::TPoints& src_pnts = src_pack_pnts.GetPoints(); ITERATE ( CPacked_seqpnt::TPoints, i, src_pnts ) { TRangeFuzz fuzz(kEmptyFuzz, kEmptyFuzz); if ( src_pack_pnts.IsSetFuzz() ) { fuzz.first.Reset(new CInt_fuzz); fuzz.first->Assign(src_pack_pnts.GetFuzz()); } bool res = x_MapInterval( src_pack_pnts.GetId(), TRange(*i, *i), src_pack_pnts.IsSetStrand(), src_pack_pnts.IsSetStrand() ? src_pack_pnts.GetStrand() : eNa_strand_unknown, fuzz);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?