seq_loc_mapper.cpp

来自「ncbi源码」· C++ 代码 · 共 1,895 行 · 第 1/5 页

CPP
1,895
字号
            ITERATE(TDstRanges, rg_it, id_it->second) {                // Collect and merge ranges                if (dst_start == kInvalidSeqPos) {                    dst_start = rg_it->GetFrom();                    dst_stop = rg_it->GetTo();                    continue;                }                if (rg_it->GetFrom() <= dst_stop + 1) {                    // overlapping or abutting ranges, continue collecting                    dst_stop = max(dst_stop, rg_it->GetTo());                    continue;                }                // Separate ranges, add conversion and restart collecting                CRef<CMappingRange> cvt(new CMappingRange(                    id_it->first, dst_start, dst_stop - dst_start + 1,                    ENa_strand(str_idx),                    id_it->first, dst_start, ENa_strand(str_idx)));                m_IdMap[cvt->m_Src_id_Handle].insert(TRangeMap::value_type(                    TRange(cvt->m_Src_from, cvt->m_Src_to), cvt));            }            if (dst_start < dst_stop) {                CRef<CMappingRange> cvt(new CMappingRange(                    id_it->first, dst_start, dst_stop - dst_start + 1,                    ENa_strand(str_idx),                    id_it->first, dst_start, ENa_strand(str_idx)));                m_IdMap[cvt->m_Src_id_Handle].insert(TRangeMap::value_type(                    TRange(cvt->m_Src_from, cvt->m_Src_to), cvt));            }        }    }    m_DstRanges.clear();}CSeq_loc_Mapper::TMappedRanges&CSeq_loc_Mapper::x_GetMappedRanges(const CSeq_id_Handle& id,                                   int strand_idx) const{    TRangesByStrand& str_vec = m_MappedLocs[id];    if ((int)str_vec.size() <= strand_idx) {        str_vec.resize(strand_idx + 1);    }    return str_vec[strand_idx];}void CSeq_loc_Mapper::x_PushRangesToDstMix(void){    if (m_MappedLocs.size() == 0) {        return;    }    // Push everything already mapped to the destination mix    CRef<CSeq_loc> loc = x_GetMappedSeq_loc();    if ( !m_Dst_loc ) {        m_Dst_loc = loc;        return;    }    if ( !loc->IsNull() ) {        x_PushLocToDstMix(loc);    }}int CSeq_loc_Mapper::x_CheckSeqWidth(const CSeq_id& id, int width){    if (!m_Scope) {        return width;    }    CBioseq_Handle handle;    try {        handle = m_Scope->GetBioseqHandle(id);    } catch (...) {        return width;    }    if ( !handle ) {        return width;    }    switch ( handle.GetBioseqMolType() ) {    case CSeq_inst::eMol_dna:    case CSeq_inst::eMol_rna:    case CSeq_inst::eMol_na:        {            if ( width != 3 ) {                if ( width ) {                    // width already set to a different value                    NCBI_THROW(CLocMapperException, eBadLocation,                               "Location contains different sequence types");                }                width = 3;            }            break;        }    case CSeq_inst::eMol_aa:        {            if ( width != 1 ) {                if ( width ) {                    // width already set to a different value                    NCBI_THROW(CLocMapperException, eBadLocation,                               "Location contains different sequence types");                }                width = 1;            }            break;        }    default:        return width;    }    // Destination width should always be checked first    if ( !m_Dst_width ) {        m_Dst_width = width;    }    TWidthFlags wid_cvt = 0;    if (width == 1) {        wid_cvt |= fWidthProtToNuc;    }    if (m_Dst_width == 1) {        wid_cvt |= fWidthNucToProt;    }    CConstRef<CSynonymsSet> syns = m_Scope->GetSynonyms(id);    ITERATE(CSynonymsSet, syn_it, *syns) {        m_Widths[syns->GetSeq_id_Handle(syn_it)] = wid_cvt;    }    return width;}int CSeq_loc_Mapper::x_CheckSeqWidth(const CSeq_loc& loc,                                     TSeqPos* total_length){    int width = 0;    *total_length = 0;    for (CSeq_loc_CI it(loc); it; ++it) {        *total_length += it.GetRange().GetLength();        if ( !m_Scope ) {            continue; // don't check sequence type;        }        width = x_CheckSeqWidth(it.GetSeq_id(), width);    }    return width;}TSeqPos CSeq_loc_Mapper::x_GetRangeLength(const CSeq_loc_CI& it){    if (it.IsWhole()  &&  IsReverse(it.GetStrand())) {        // For reverse strand we need real interval length, not just "whole"        CBioseq_Handle h;        if (m_Scope) {            h = m_Scope->GetBioseqHandle(it.GetSeq_id());        }        if ( !h ) {            NCBI_THROW(CLocMapperException, eUnknownLength,                       "Can not map from minus strand -- "                       "unknown sequence length");        }        return h.GetBioseqLength();    }    else {        return it.GetRange().GetLength();    }}void CSeq_loc_Mapper::x_AddConversion(const CSeq_id& src_id,                                      TSeqPos        src_start,                                      ENa_strand     src_strand,                                      const CSeq_id& dst_id,                                      TSeqPos        dst_start,                                      ENa_strand     dst_strand,                                      TSeqPos        length){    if (m_DstRanges.size() <= size_t(dst_strand)) {        m_DstRanges.resize(size_t(dst_strand) + 1);    }    if (m_Scope) {        CConstRef<CSynonymsSet> syns = m_Scope->GetSynonyms(src_id);        ITERATE(CSynonymsSet, syn_it, *syns) {            CRef<CMappingRange> cvt(new CMappingRange(                CSynonymsSet::GetSeq_id_Handle(syn_it),                src_start, length, src_strand,                CSeq_id_Handle::GetHandle(dst_id), dst_start, dst_strand));            m_IdMap[cvt->m_Src_id_Handle].insert(TRangeMap::value_type(                TRange(cvt->m_Src_from, cvt->m_Src_to), cvt));        }        CConstRef<CSynonymsSet> dst_syns = m_Scope->GetSynonyms(dst_id);        ITERATE(CSynonymsSet, dst_syn_it, *dst_syns) {            m_DstRanges[size_t(dst_strand)]                [CSynonymsSet::GetSeq_id_Handle(dst_syn_it)]                .push_back(TRange(dst_start, dst_start + length - 1));        }    }    else {        CRef<CMappingRange> cvt(new CMappingRange(            CSeq_id_Handle::GetHandle(src_id),            src_start, length, src_strand,            CSeq_id_Handle::GetHandle(dst_id), dst_start, dst_strand));        m_IdMap[cvt->m_Src_id_Handle].insert(TRangeMap::value_type(            TRange(cvt->m_Src_from, cvt->m_Src_to), cvt));        m_DstRanges[size_t(dst_strand)][CSeq_id_Handle::GetHandle(dst_id)]            .push_back(TRange(dst_start, dst_start + length - 1));    }}void CSeq_loc_Mapper::x_NextMappingRange(const CSeq_id& src_id,                                         TSeqPos&       src_start,                                         TSeqPos&       src_len,                                         ENa_strand     src_strand,                                         const CSeq_id& dst_id,                                         TSeqPos&       dst_start,                                         TSeqPos&       dst_len,                                         ENa_strand     dst_strand){    TSeqPos cvt_src_start = src_start;    TSeqPos cvt_dst_start = dst_start;    TSeqPos cvt_length;    if (src_len == dst_len) {        cvt_length = src_len;        src_len = 0;        dst_len = 0;    }    else if (src_len > dst_len) {        // reverse interval for minus strand        if (IsReverse(src_strand)) {            cvt_src_start += src_len - dst_len;        }        else {            src_start += dst_len;        }        cvt_length = dst_len;        src_len -= cvt_length;        dst_len = 0;    }    else { // if (src_len < dst_len)        // reverse interval for minus strand        if ( IsReverse(dst_strand) ) {            cvt_dst_start += dst_len - src_len;        }        else {            dst_start += src_len;        }        cvt_length = src_len;        dst_len -= cvt_length;        src_len = 0;    }    x_AddConversion(        src_id, cvt_src_start, src_strand,        dst_id, cvt_dst_start, dst_strand,        cvt_length);}void CSeq_loc_Mapper::x_Initialize(const CSeq_loc& source,                                   const CSeq_loc& target,                                   int             frame){    // Check sequence type or total location length to    // adjust intervals according to character width.    TSeqPos dst_total_len;    x_CheckSeqWidth(target, &dst_total_len);    TSeqPos src_total_len;    int src_width = x_CheckSeqWidth(source, &src_total_len);    if (!src_width  ||  !m_Dst_width) {        // Try to detect types by lengths        if (src_total_len == kInvalidSeqPos || dst_total_len== kInvalidSeqPos) {            NCBI_THROW(CLocMapperException, eBadLocation,                       "Undefined location length -- "                       "unable to detect sequence type");        }        if (src_total_len == dst_total_len) {            src_width = 1;            m_Dst_width = 1;            _ASSERT(!frame);        }        // truncate incomplete left and right codons        else if (src_total_len/3 == dst_total_len) {            src_width = 3;            m_Dst_width = 1;        }        else if (dst_total_len/3 == src_total_len) {            src_width = 1;            m_Dst_width = 3;        }        else {            NCBI_THROW(CAnnotException, eBadLocation,                       "Wrong location length -- "                       "unable to detect sequence type");        }    }    else {        if (src_width == m_Dst_width) {            src_width = 1;            m_Dst_width = 1;            _ASSERT(!frame);        }    }    m_UseWidth |= (src_width != m_Dst_width);    // Create conversions    CSeq_loc_CI src_it(source);    CSeq_loc_CI dst_it(target);    TSeqPos src_start = src_it.GetRange().GetFrom()*m_Dst_width;    TSeqPos src_len = x_GetRangeLength(src_it)*m_Dst_width;    TSeqPos dst_start = dst_it.GetRange().GetFrom()*src_width;    TSeqPos dst_len = x_GetRangeLength(dst_it)*src_width;    if ( frame ) {        // ignore pre-frame range        if (src_width == 3) {            src_start += frame - 1;        }        if (m_Dst_width == 3) {            dst_start += frame - 1;        }    }    while (src_it  &&  dst_it) {        x_NextMappingRange(            src_it.GetSeq_id(), src_start, src_len, src_it.GetStrand(),            dst_it.GetSeq_id(), dst_start, dst_len, dst_it.GetStrand());        if (src_len == 0  &&  ++src_it) {            src_start = src_it.GetRange().GetFrom()*m_Dst_width;            src_len = x_GetRangeLength(src_it)*m_Dst_width;        }        if (dst_len == 0  &&  ++dst_it) {            dst_start = dst_it.GetRange().GetFrom()*src_width;            dst_len = x_GetRangeLength(dst_it)*src_width;        }    }}void CSeq_loc_Mapper::x_Initialize(const CSeq_align& map_align,                                   const CSeq_id&    to_id){    switch ( map_align.GetSegs().Which() ) {    case CSeq_align::C_Segs::e_Dendiag:        {            const TDendiag& diags = map_align.GetSegs().GetDendiag();            ITERATE(TDendiag, diag_it, diags) {                size_t to_row = size_t(-1);                for (size_t i = 0; i < (*diag_it)->GetIds().size(); ++i) {                    // find the first row with required ID                    // ??? check if there are multiple rows with the same ID?                    if ( (*diag_it)->GetIds()[i]->Equals(to_id) ) {                        to_row = i;                        break;                    }                }                if (to_row == size_t(-1)) {                    NCBI_THROW(CLocMapperException, eBadAlignment,                               "Target ID not found in the alignment");                }                x_InitAlign(**diag_it, to_row);            }            break;        }    case CSeq_align::C_Segs::e_Denseg:        {            const CDense_seg& dseg = map_align.GetSegs().GetDenseg();            size_t to_row = size_t(-1);            for (size_t i = 0; i < dseg.GetIds().size(); ++i) {                if (dseg.GetIds()[i]->Equals(to_id)) {                    to_row = i;                    break;                }            }            if (to_row == size_t(-1)) {                NCBI_THROW(CLocMapperException, eBadAlignment,                           "Target ID not found in the alignment");            }            x_InitAlign(dseg, to_row);            break;        }    case CSeq_align::C_Segs::e_Std:        {            const TStd& std_segs = map_align.GetSegs().GetStd();            ITERATE(TStd, std_seg, std_segs) {                size_t to_row = size_t(-1);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?