seq_loc_mapper.cpp
来自「ncbi源码」· C++ 代码 · 共 1,895 行 · 第 1/5 页
CPP
1,895 行
ITERATE(TDstRanges, rg_it, id_it->second) { // Collect and merge ranges if (dst_start == kInvalidSeqPos) { dst_start = rg_it->GetFrom(); dst_stop = rg_it->GetTo(); continue; } if (rg_it->GetFrom() <= dst_stop + 1) { // overlapping or abutting ranges, continue collecting dst_stop = max(dst_stop, rg_it->GetTo()); continue; } // Separate ranges, add conversion and restart collecting CRef<CMappingRange> cvt(new CMappingRange( id_it->first, dst_start, dst_stop - dst_start + 1, ENa_strand(str_idx), id_it->first, dst_start, ENa_strand(str_idx))); m_IdMap[cvt->m_Src_id_Handle].insert(TRangeMap::value_type( TRange(cvt->m_Src_from, cvt->m_Src_to), cvt)); } if (dst_start < dst_stop) { CRef<CMappingRange> cvt(new CMappingRange( id_it->first, dst_start, dst_stop - dst_start + 1, ENa_strand(str_idx), id_it->first, dst_start, ENa_strand(str_idx))); m_IdMap[cvt->m_Src_id_Handle].insert(TRangeMap::value_type( TRange(cvt->m_Src_from, cvt->m_Src_to), cvt)); } } } m_DstRanges.clear();}CSeq_loc_Mapper::TMappedRanges&CSeq_loc_Mapper::x_GetMappedRanges(const CSeq_id_Handle& id, int strand_idx) const{ TRangesByStrand& str_vec = m_MappedLocs[id]; if ((int)str_vec.size() <= strand_idx) { str_vec.resize(strand_idx + 1); } return str_vec[strand_idx];}void CSeq_loc_Mapper::x_PushRangesToDstMix(void){ if (m_MappedLocs.size() == 0) { return; } // Push everything already mapped to the destination mix CRef<CSeq_loc> loc = x_GetMappedSeq_loc(); if ( !m_Dst_loc ) { m_Dst_loc = loc; return; } if ( !loc->IsNull() ) { x_PushLocToDstMix(loc); }}int CSeq_loc_Mapper::x_CheckSeqWidth(const CSeq_id& id, int width){ if (!m_Scope) { return width; } CBioseq_Handle handle; try { handle = m_Scope->GetBioseqHandle(id); } catch (...) { return width; } if ( !handle ) { return width; } switch ( handle.GetBioseqMolType() ) { case CSeq_inst::eMol_dna: case CSeq_inst::eMol_rna: case CSeq_inst::eMol_na: { if ( width != 3 ) { if ( width ) { // width already set to a different value NCBI_THROW(CLocMapperException, eBadLocation, "Location contains different sequence types"); } width = 3; } break; } case CSeq_inst::eMol_aa: { if ( width != 1 ) { if ( width ) { // width already set to a different value NCBI_THROW(CLocMapperException, eBadLocation, "Location contains different sequence types"); } width = 1; } break; } default: return width; } // Destination width should always be checked first if ( !m_Dst_width ) { m_Dst_width = width; } TWidthFlags wid_cvt = 0; if (width == 1) { wid_cvt |= fWidthProtToNuc; } if (m_Dst_width == 1) { wid_cvt |= fWidthNucToProt; } CConstRef<CSynonymsSet> syns = m_Scope->GetSynonyms(id); ITERATE(CSynonymsSet, syn_it, *syns) { m_Widths[syns->GetSeq_id_Handle(syn_it)] = wid_cvt; } return width;}int CSeq_loc_Mapper::x_CheckSeqWidth(const CSeq_loc& loc, TSeqPos* total_length){ int width = 0; *total_length = 0; for (CSeq_loc_CI it(loc); it; ++it) { *total_length += it.GetRange().GetLength(); if ( !m_Scope ) { continue; // don't check sequence type; } width = x_CheckSeqWidth(it.GetSeq_id(), width); } return width;}TSeqPos CSeq_loc_Mapper::x_GetRangeLength(const CSeq_loc_CI& it){ if (it.IsWhole() && IsReverse(it.GetStrand())) { // For reverse strand we need real interval length, not just "whole" CBioseq_Handle h; if (m_Scope) { h = m_Scope->GetBioseqHandle(it.GetSeq_id()); } if ( !h ) { NCBI_THROW(CLocMapperException, eUnknownLength, "Can not map from minus strand -- " "unknown sequence length"); } return h.GetBioseqLength(); } else { return it.GetRange().GetLength(); }}void CSeq_loc_Mapper::x_AddConversion(const CSeq_id& src_id, TSeqPos src_start, ENa_strand src_strand, const CSeq_id& dst_id, TSeqPos dst_start, ENa_strand dst_strand, TSeqPos length){ if (m_DstRanges.size() <= size_t(dst_strand)) { m_DstRanges.resize(size_t(dst_strand) + 1); } if (m_Scope) { CConstRef<CSynonymsSet> syns = m_Scope->GetSynonyms(src_id); ITERATE(CSynonymsSet, syn_it, *syns) { CRef<CMappingRange> cvt(new CMappingRange( CSynonymsSet::GetSeq_id_Handle(syn_it), src_start, length, src_strand, CSeq_id_Handle::GetHandle(dst_id), dst_start, dst_strand)); m_IdMap[cvt->m_Src_id_Handle].insert(TRangeMap::value_type( TRange(cvt->m_Src_from, cvt->m_Src_to), cvt)); } CConstRef<CSynonymsSet> dst_syns = m_Scope->GetSynonyms(dst_id); ITERATE(CSynonymsSet, dst_syn_it, *dst_syns) { m_DstRanges[size_t(dst_strand)] [CSynonymsSet::GetSeq_id_Handle(dst_syn_it)] .push_back(TRange(dst_start, dst_start + length - 1)); } } else { CRef<CMappingRange> cvt(new CMappingRange( CSeq_id_Handle::GetHandle(src_id), src_start, length, src_strand, CSeq_id_Handle::GetHandle(dst_id), dst_start, dst_strand)); m_IdMap[cvt->m_Src_id_Handle].insert(TRangeMap::value_type( TRange(cvt->m_Src_from, cvt->m_Src_to), cvt)); m_DstRanges[size_t(dst_strand)][CSeq_id_Handle::GetHandle(dst_id)] .push_back(TRange(dst_start, dst_start + length - 1)); }}void CSeq_loc_Mapper::x_NextMappingRange(const CSeq_id& src_id, TSeqPos& src_start, TSeqPos& src_len, ENa_strand src_strand, const CSeq_id& dst_id, TSeqPos& dst_start, TSeqPos& dst_len, ENa_strand dst_strand){ TSeqPos cvt_src_start = src_start; TSeqPos cvt_dst_start = dst_start; TSeqPos cvt_length; if (src_len == dst_len) { cvt_length = src_len; src_len = 0; dst_len = 0; } else if (src_len > dst_len) { // reverse interval for minus strand if (IsReverse(src_strand)) { cvt_src_start += src_len - dst_len; } else { src_start += dst_len; } cvt_length = dst_len; src_len -= cvt_length; dst_len = 0; } else { // if (src_len < dst_len) // reverse interval for minus strand if ( IsReverse(dst_strand) ) { cvt_dst_start += dst_len - src_len; } else { dst_start += src_len; } cvt_length = src_len; dst_len -= cvt_length; src_len = 0; } x_AddConversion( src_id, cvt_src_start, src_strand, dst_id, cvt_dst_start, dst_strand, cvt_length);}void CSeq_loc_Mapper::x_Initialize(const CSeq_loc& source, const CSeq_loc& target, int frame){ // Check sequence type or total location length to // adjust intervals according to character width. TSeqPos dst_total_len; x_CheckSeqWidth(target, &dst_total_len); TSeqPos src_total_len; int src_width = x_CheckSeqWidth(source, &src_total_len); if (!src_width || !m_Dst_width) { // Try to detect types by lengths if (src_total_len == kInvalidSeqPos || dst_total_len== kInvalidSeqPos) { NCBI_THROW(CLocMapperException, eBadLocation, "Undefined location length -- " "unable to detect sequence type"); } if (src_total_len == dst_total_len) { src_width = 1; m_Dst_width = 1; _ASSERT(!frame); } // truncate incomplete left and right codons else if (src_total_len/3 == dst_total_len) { src_width = 3; m_Dst_width = 1; } else if (dst_total_len/3 == src_total_len) { src_width = 1; m_Dst_width = 3; } else { NCBI_THROW(CAnnotException, eBadLocation, "Wrong location length -- " "unable to detect sequence type"); } } else { if (src_width == m_Dst_width) { src_width = 1; m_Dst_width = 1; _ASSERT(!frame); } } m_UseWidth |= (src_width != m_Dst_width); // Create conversions CSeq_loc_CI src_it(source); CSeq_loc_CI dst_it(target); TSeqPos src_start = src_it.GetRange().GetFrom()*m_Dst_width; TSeqPos src_len = x_GetRangeLength(src_it)*m_Dst_width; TSeqPos dst_start = dst_it.GetRange().GetFrom()*src_width; TSeqPos dst_len = x_GetRangeLength(dst_it)*src_width; if ( frame ) { // ignore pre-frame range if (src_width == 3) { src_start += frame - 1; } if (m_Dst_width == 3) { dst_start += frame - 1; } } while (src_it && dst_it) { x_NextMappingRange( src_it.GetSeq_id(), src_start, src_len, src_it.GetStrand(), dst_it.GetSeq_id(), dst_start, dst_len, dst_it.GetStrand()); if (src_len == 0 && ++src_it) { src_start = src_it.GetRange().GetFrom()*m_Dst_width; src_len = x_GetRangeLength(src_it)*m_Dst_width; } if (dst_len == 0 && ++dst_it) { dst_start = dst_it.GetRange().GetFrom()*src_width; dst_len = x_GetRangeLength(dst_it)*src_width; } }}void CSeq_loc_Mapper::x_Initialize(const CSeq_align& map_align, const CSeq_id& to_id){ switch ( map_align.GetSegs().Which() ) { case CSeq_align::C_Segs::e_Dendiag: { const TDendiag& diags = map_align.GetSegs().GetDendiag(); ITERATE(TDendiag, diag_it, diags) { size_t to_row = size_t(-1); for (size_t i = 0; i < (*diag_it)->GetIds().size(); ++i) { // find the first row with required ID // ??? check if there are multiple rows with the same ID? if ( (*diag_it)->GetIds()[i]->Equals(to_id) ) { to_row = i; break; } } if (to_row == size_t(-1)) { NCBI_THROW(CLocMapperException, eBadAlignment, "Target ID not found in the alignment"); } x_InitAlign(**diag_it, to_row); } break; } case CSeq_align::C_Segs::e_Denseg: { const CDense_seg& dseg = map_align.GetSegs().GetDenseg(); size_t to_row = size_t(-1); for (size_t i = 0; i < dseg.GetIds().size(); ++i) { if (dseg.GetIds()[i]->Equals(to_id)) { to_row = i; break; } } if (to_row == size_t(-1)) { NCBI_THROW(CLocMapperException, eBadAlignment, "Target ID not found in the alignment"); } x_InitAlign(dseg, to_row); break; } case CSeq_align::C_Segs::e_Std: { const TStd& std_segs = map_align.GetSegs().GetStd(); ITERATE(TStd, std_seg, std_segs) { size_t to_row = size_t(-1);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?