seq_loc_mapper.cpp
来自「ncbi源码」· C++ 代码 · 共 1,895 行 · 第 1/5 页
CPP
1,895 行
for (size_t i = 0; i < (*std_seg)->GetIds().size(); ++i) { if ((*std_seg)->GetIds()[i]->Equals(to_id)) { to_row = i; break; } } if (to_row == size_t(-1)) { NCBI_THROW(CLocMapperException, eBadAlignment, "Target ID not found in the alignment"); } x_InitAlign(**std_seg, to_row); } break; } case CSeq_align::C_Segs::e_Packed: { const CPacked_seg& pseg = map_align.GetSegs().GetPacked(); size_t to_row = size_t(-1); for (size_t i = 0; i < pseg.GetIds().size(); ++i) { if (pseg.GetIds()[i]->Equals(to_id)) { to_row = i; break; } } if (to_row == size_t(-1)) { NCBI_THROW(CLocMapperException, eBadAlignment, "Target ID not found in the alignment"); } x_InitAlign(pseg, to_row); break; } case CSeq_align::C_Segs::e_Disc: { const CSeq_align_set& aln_set = map_align.GetSegs().GetDisc(); ITERATE(CSeq_align_set::Tdata, aln, aln_set.Get()) { x_Initialize(**aln, to_id); } break; } default: NCBI_THROW(CLocMapperException, eBadAlignment, "Unsupported alignment type"); }}void CSeq_loc_Mapper::x_Initialize(const CSeq_align& map_align, size_t to_row){ switch ( map_align.GetSegs().Which() ) { case CSeq_align::C_Segs::e_Dendiag: { const TDendiag& diags = map_align.GetSegs().GetDendiag(); ITERATE(TDendiag, diag_it, diags) { x_InitAlign(**diag_it, to_row); } break; } case CSeq_align::C_Segs::e_Denseg: { const CDense_seg& dseg = map_align.GetSegs().GetDenseg(); x_InitAlign(dseg, to_row); break; } case CSeq_align::C_Segs::e_Std: { const TStd& std_segs = map_align.GetSegs().GetStd(); ITERATE(TStd, std_seg, std_segs) { x_InitAlign(**std_seg, to_row); } break; } case CSeq_align::C_Segs::e_Packed: { const CPacked_seg& pseg = map_align.GetSegs().GetPacked(); x_InitAlign(pseg, to_row); break; } case CSeq_align::C_Segs::e_Disc: { // The same row in each sub-alignment? const CSeq_align_set& aln_set = map_align.GetSegs().GetDisc(); ITERATE(CSeq_align_set::Tdata, aln, aln_set.Get()) { x_Initialize(**aln, to_row); } break; } default: NCBI_THROW(CLocMapperException, eBadAlignment, "Unsupported alignment type"); }}void CSeq_loc_Mapper::x_InitAlign(const CDense_diag& diag, size_t to_row){ if (diag.GetStarts()[to_row] < 0) { // Destination ID is not present in this dense-diag return; } size_t dim = diag.GetDim(); _ASSERT(to_row < dim); if (dim != diag.GetIds().size()) { ERR_POST(Warning << "Invalid 'ids' size in dendiag"); dim = min(dim, diag.GetIds().size()); } if (dim != diag.GetStarts().size()) { ERR_POST(Warning << "Invalid 'starts' size in dendiag"); dim = min(dim, diag.GetStarts().size()); } bool have_strands = diag.IsSetStrands(); if (have_strands && dim != diag.GetStrands().size()) { ERR_POST(Warning << "Invalid 'strands' size in dendiag"); dim = min(dim, diag.GetStrands().size()); } ENa_strand dst_strand = have_strands ? diag.GetStrands()[to_row] : eNa_strand_unknown; const CSeq_id& dst_id = *diag.GetIds()[to_row]; if (!m_Dst_width) { m_Dst_width = x_CheckSeqWidth(dst_id, m_Dst_width); } for (size_t row = 0; row < dim; ++row) { if (row == to_row) { continue; } if (diag.GetStarts()[row] < 0) { continue; } // In alignments with multiple sequence types segment length // should be multiplied by character width. int src_width = 0; const CSeq_id& src_id = *diag.GetIds()[row]; src_width = x_CheckSeqWidth(src_id, src_width); m_UseWidth |= (src_width != m_Dst_width); int dst_width_rel = (m_UseWidth) ? 1 : m_Dst_width; int src_width_rel = (m_UseWidth) ? 1 : src_width; TSeqPos src_start = diag.GetStarts()[row]*dst_width_rel; TSeqPos src_len = diag.GetLen()*src_width*dst_width_rel; TSeqPos dst_start = diag.GetStarts()[to_row]*src_width_rel; TSeqPos dst_len = diag.GetLen()*m_Dst_width*src_width_rel; ENa_strand src_strand = have_strands ? diag.GetStrands()[row] : eNa_strand_unknown; x_NextMappingRange( src_id, src_start, src_len, src_strand, dst_id, dst_start, dst_len, dst_strand); _ASSERT(!src_len && !dst_len); }}void CSeq_loc_Mapper::x_InitAlign(const CDense_seg& denseg, size_t to_row){ size_t dim = denseg.GetDim(); _ASSERT(to_row < dim); size_t numseg = denseg.GetNumseg(); // claimed dimension may not be accurate :-/ if (numseg != denseg.GetLens().size()) { ERR_POST(Warning << "Invalid 'lens' size in denseg"); numseg = min(numseg, denseg.GetLens().size()); } if (dim != denseg.GetIds().size()) { ERR_POST(Warning << "Invalid 'ids' size in denseg"); dim = min(dim, denseg.GetIds().size()); } if (dim*numseg != denseg.GetStarts().size()) { ERR_POST(Warning << "Invalid 'starts' size in denseg"); dim = min(dim*numseg, denseg.GetStarts().size()) / numseg; } bool have_strands = denseg.IsSetStrands(); if (have_strands && dim*numseg != denseg.GetStrands().size()) { ERR_POST(Warning << "Invalid 'strands' size in denseg"); dim = min(dim*numseg, denseg.GetStrands().size()) / numseg; } const CSeq_id& dst_id = *denseg.GetIds()[to_row]; if (!m_Dst_width) { if ( denseg.IsSetWidths() ) { m_Dst_width = denseg.GetWidths()[to_row]; } else { m_Dst_width = x_CheckSeqWidth(dst_id, m_Dst_width); } } for (size_t row = 0; row < dim; ++row) { if (row == to_row) { continue; } const CSeq_id& src_id = *denseg.GetIds()[row]; int src_width = 0; if ( denseg.IsSetWidths() ) { src_width = denseg.GetWidths()[row]; } else { src_width = x_CheckSeqWidth(src_id, src_width); } m_UseWidth |= (src_width != m_Dst_width); int dst_width_rel = (m_UseWidth) ? 1 : m_Dst_width; int src_width_rel = (m_UseWidth) ? 1 : src_width; for (size_t seg = 0; seg < numseg; ++seg) { int i_src_start = denseg.GetStarts()[seg*dim + row]; int i_dst_start = denseg.GetStarts()[seg*dim + to_row]; if (i_src_start < 0 || i_dst_start < 0) { continue; } ENa_strand dst_strand = have_strands ? denseg.GetStrands()[seg*dim + to_row] : eNa_strand_unknown; ENa_strand src_strand = have_strands ? denseg.GetStrands()[seg*dim + row] : eNa_strand_unknown; // In alignments with multiple sequence types segment length // should be multiplied by character width. TSeqPos src_len = denseg.GetLens()[seg]*src_width*dst_width_rel; TSeqPos dst_len = denseg.GetLens()[seg]*m_Dst_width*src_width_rel; TSeqPos src_start = (TSeqPos)(i_src_start)*dst_width_rel; TSeqPos dst_start = (TSeqPos)(i_dst_start)*src_width_rel; x_NextMappingRange( src_id, src_start, src_len, src_strand, dst_id, dst_start, dst_len, dst_strand); _ASSERT(!src_len && !dst_len); } }}void CSeq_loc_Mapper::x_InitAlign(const CStd_seg& sseg, size_t to_row){ size_t dim = sseg.GetDim(); if (dim != sseg.GetLoc().size()) { ERR_POST(Warning << "Invalid 'loc' size in std-seg"); dim = min(dim, sseg.GetLoc().size()); } if (sseg.IsSetIds() && dim != sseg.GetIds().size()) { ERR_POST(Warning << "Invalid 'ids' size in std-seg"); dim = min(dim, sseg.GetIds().size()); } const CSeq_loc& dst_loc = *sseg.GetLoc()[to_row]; for (size_t row = 0; row < dim; ++row ) { if (row == to_row) { continue; } const CSeq_loc& src_loc = *sseg.GetLoc()[row]; if ( src_loc.IsEmpty() ) { // skipped row in this segment continue; } x_Initialize(src_loc, dst_loc); }}void CSeq_loc_Mapper::x_InitAlign(const CPacked_seg& pseg, size_t to_row){ size_t dim = pseg.GetDim(); size_t numseg = pseg.GetNumseg(); // claimed dimension may not be accurate :-/ if (numseg != pseg.GetLens().size()) { ERR_POST(Warning << "Invalid 'lens' size in packed-seg"); numseg = min(numseg, pseg.GetLens().size()); } if (dim != pseg.GetIds().size()) { ERR_POST(Warning << "Invalid 'ids' size in packed-seg"); dim = min(dim, pseg.GetIds().size()); } if (dim*numseg != pseg.GetStarts().size()) { ERR_POST(Warning << "Invalid 'starts' size in packed-seg"); dim = min(dim*numseg, pseg.GetStarts().size()) / numseg; } bool have_strands = pseg.IsSetStrands(); if (have_strands && dim*numseg != pseg.GetStrands().size()) { ERR_POST(Warning << "Invalid 'strands' size in packed-seg"); dim = min(dim*numseg, pseg.GetStrands().size()) / numseg; } const CSeq_id& dst_id = *pseg.GetIds()[to_row]; if (!m_Dst_width) { m_Dst_width = x_CheckSeqWidth(dst_id, m_Dst_width); } for (size_t row = 0; row < dim; ++row) { if (row == to_row) { continue; } const CSeq_id& src_id = *pseg.GetIds()[row]; int src_width = 0; src_width = x_CheckSeqWidth(src_id, src_width); m_UseWidth |= (src_width != m_Dst_width); int dst_width_rel = (m_UseWidth) ? 1 : m_Dst_width; int src_width_rel = (m_UseWidth) ? 1 : src_width; for (size_t seg = 0; seg < numseg; ++seg) { if (!pseg.GetPresent()[row] || !pseg.GetPresent()[to_row]) { continue; } ENa_strand dst_strand = have_strands ? pseg.GetStrands()[seg*dim + to_row] : eNa_strand_unknown; ENa_strand src_strand = have_strands ? pseg.GetStrands()[seg*dim + row] : eNa_strand_unknown; // In alignments with multiple sequence types segment length // should be multiplied by character width. TSeqPos src_len = pseg.GetLens()[seg]*src_width*dst_width_rel; TSeqPos dst_len = pseg.GetLens()[seg]*m_Dst_width*src_width_rel; TSeqPos src_start = pseg.GetStarts()[seg*dim + row]*dst_width_rel; TSeqPos dst_start = pseg.GetStarts()[seg*dim + to_row]*src_width_rel; x_NextMappingRange( src_id, src_start, src_len, src_strand, dst_id, dst_start, dst_len, dst_strand); _ASSERT(!src_len && !dst_len); } }}void CSeq_loc_Mapper::x_Initialize(const CSeqMap& seq_map, const CSeq_id* top_id){ CSeqMap::const_iterator seg_it = seq_map.begin_resolved(m_Scope.GetPointerOrNull(), size_t(-1), CSeqMap::fFindRef); TSeqPos top_start = kInvalidSeqPos; TSeqPos top_stop = kInvalidSeqPos; TSeqPos dst_seg_start = kInvalidSeqPos; CConstRef<CSeq_id> dst_id; for ( ; seg_it; ++seg_it) { _ASSERT(seg_it.GetType() == CSeqMap::eSeqRef); if (seg_it.GetPosition() > top_stop || !dst_id) { // New top-level segment top_start = seg_it.GetPosition(); top_stop = seg_it.GetEndPosition() - 1; if (top_id) { // Top level is a bioseq dst_id.Reset(top_id); dst_seg_start = top_start; } else { // Top level is a seq-loc, positions are // on the first-level references dst_id = seg_it.GetRefSeqid().GetSeqId(); dst_seg_start = seg_it.GetRefPosition(); continue; } } // when top_id is set, destination position = GetPosition(), // else it needs to be calculated from top_start/stop and dst_start/stop. TSeqPos dst_from = dst_seg_start + seg_it.GetPosition() - top_start; _ASSERT(dst_from >= dst_seg_start); TSeqPos dst_len = seg_it.GetLength(); CConstRef<CSeq_id> src_id(seg_it.GetRefSeqid().GetSeqId()); TSeqPos src_from = seg_it.GetRefPosition(); TSeqPos src_len = dst_len; ENa_strand src_strand = seg_it.GetRefMinusStrand() ? eNa_strand_minus : eNa_strand_unknown; x_NextMappingRange(*src_id, src_from, src_len, src_strand, *dst_id, dst_from, dst_len, eNa_strand_unknown); _ASSERT(src_len == 0 && dst_len == 0); }}void CSeq_loc_Mapper::x_Initialize(const CSeqMap& seq_map, size_t depth, const CSeq_id* top_id){ CSeqMap::const_iterator seg_it = seq_map.begin_resolved(m_Scope.GetPointerOrNull(), depth, CSeqMap::fFindRef);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?