seq_loc_mapper.cpp

来自「ncbi源码」· C++ 代码 · 共 1,895 行 · 第 1/5 页

CPP
1,895
字号
    TSeqPos top_start = kInvalidSeqPos;    TSeqPos top_stop = kInvalidSeqPos;    TSeqPos src_seg_start = kInvalidSeqPos;    TSeqPos last_stop = kInvalidSeqPos;    CConstRef<CSeq_id> src_id;    TSeqPos src_from = 0;    TSeqPos src_len = 0;    TSeqPos dst_from = 0;    TSeqPos dst_len = 0;    CConstRef<CSeq_id> dst_id;    ENa_strand dst_strand = eNa_strand_unknown;    for ( ; seg_it; ++seg_it) {        _ASSERT(seg_it.GetType() == CSeqMap::eSeqRef);        if (seg_it.GetPosition() > top_stop  ||  !src_id) {            // New top-level segment            top_start = seg_it.GetPosition();            top_stop = seg_it.GetEndPosition() - 1;            if (top_id) {                // Top level is a bioseq                src_id.Reset(top_id);                src_seg_start = top_start;            }            else {                // Top level is a seq-loc, positions are                // on the first-level references                src_id = seg_it.GetRefSeqid().GetSeqId();                src_seg_start = seg_it.GetRefPosition();                continue;            }        }        if (seg_it.GetPosition() >= last_stop) {            x_NextMappingRange(*src_id, src_from, src_len, eNa_strand_unknown,                *dst_id, dst_from, dst_len, dst_strand);            _ASSERT(src_len == 0  &&  dst_len == 0);        }        src_from = src_seg_start + seg_it.GetPosition() - top_start;        src_len = seg_it.GetLength();        dst_from = seg_it.GetRefPosition();        dst_len = src_len;        dst_id.Reset(seg_it.GetRefSeqid().GetSeqId());        dst_strand = seg_it.GetRefMinusStrand() ?            eNa_strand_minus : eNa_strand_unknown;        last_stop = seg_it.GetEndPosition();    }    if (src_len > 0) {        x_NextMappingRange(*src_id, src_from, src_len, eNa_strand_unknown,            *dst_id, dst_from, dst_len, dst_strand);    }}CSeq_loc_Mapper::TRangeIteratorCSeq_loc_Mapper::x_BeginMappingRanges(CSeq_id_Handle id,                                      TSeqPos from,                                      TSeqPos to){    TIdMap::iterator ranges = m_IdMap.find(id);    if (ranges == m_IdMap.end()) {        return TRangeIterator();    }    return ranges->second.begin(TRange(from, to));}bool CSeq_loc_Mapper::x_MapInterval(const CSeq_id&   src_id,                                    TRange           src_rg,                                    bool             is_set_strand,                                    ENa_strand       src_strand,                                    TRangeFuzz       orig_fuzz){    bool res = false;    if (m_UseWidth  &&        m_Widths[CSeq_id_Handle::GetHandle(src_id)] & fWidthProtToNuc) {        src_rg = TRange(src_rg.GetFrom()*3, src_rg.GetTo()*3 + 2);    }    CSeq_id_Handle src_idh = CSeq_id_Handle::GetHandle(src_id);    // Sorted mapping ranges    typedef vector< CRef<CMappingRange> > TSortedMappings;    TSortedMappings mappings;    TRangeIterator rg_it = x_BeginMappingRanges(        src_idh, src_rg.GetFrom(), src_rg.GetTo());    for ( ; rg_it; ++rg_it) {        mappings.push_back(rg_it->second);    }    sort(mappings.begin(), mappings.end(), CMappingRangeRef_Less());    TSeqPos last_src_to = 0;    for (size_t idx = 0; idx < mappings.size(); ++idx) {        const CMappingRange& cvt = *mappings[idx];        if ( !cvt.CanMap(src_rg.GetFrom(), src_rg.GetTo(),            is_set_strand, src_strand) ) {            continue;        }        TSeqPos from = src_rg.GetFrom();        TSeqPos to = src_rg.GetTo();        TRangeFuzz fuzz = cvt.Map_Fuzz(orig_fuzz);        if (from < cvt.m_Src_from) {            from = cvt.m_Src_from;            // Set partial only if a non-empty range is to be skipped            if (cvt.m_Src_from > last_src_to + 1) {                if ( cvt.m_Reverse ) {                    if (!fuzz.second) {                        fuzz.second.Reset(new CInt_fuzz);                    }                    fuzz.second->SetLim(CInt_fuzz::eLim_gt);                }                else {                    if (!fuzz.first) {                        fuzz.first.Reset(new CInt_fuzz);                    }                    fuzz.first->SetLim(CInt_fuzz::eLim_lt);                }                m_Partial = true;            }        }        last_src_to = cvt.m_Src_to;        if ( to > cvt.m_Src_to ) {            to = cvt.m_Src_to;            // Set partial only if a non-empty range is to be skipped            if (idx == mappings.size() - 1 ||                mappings[idx+1]->m_Src_from > last_src_to + 1) {                if ( cvt.m_Reverse ) {                    if (!fuzz.first) {                        fuzz.first.Reset(new CInt_fuzz);                    }                    fuzz.first->SetLim(CInt_fuzz::eLim_lt);                }                else {                    if (!fuzz.second) {                        fuzz.second.Reset(new CInt_fuzz);                    }                    fuzz.second->SetLim(CInt_fuzz::eLim_gt);                }                m_Partial = true;            }        }        if ( from > to ) {            continue;        }        TRange rg = cvt.Map_Range(from, to);        ENa_strand dst_strand;        bool is_set_dst_strand = cvt.Map_Strand(is_set_strand,            src_strand, &dst_strand);        x_GetMappedRanges(cvt.m_Dst_id_Handle,            is_set_dst_strand ? int(dst_strand) + 1 : 0)            .push_back(TRangeWithFuzz(rg, fuzz));        res = true;    }    return res;}void CSeq_loc_Mapper::x_PushLocToDstMix(CRef<CSeq_loc> loc){    _ASSERT(loc);    if ( !m_Dst_loc  ||  !m_Dst_loc->IsMix() ) {        CRef<CSeq_loc> tmp = m_Dst_loc;        m_Dst_loc.Reset(new CSeq_loc);        m_Dst_loc->SetMix();        if ( tmp ) {            m_Dst_loc->SetMix().Set().push_back(tmp);        }    }    CSeq_loc_mix::Tdata& mix = m_Dst_loc->SetMix().Set();    if ( loc->IsNull() ) {        if ( m_GapFlag == eGapRemove ) {            return;        }        if ( mix.size() > 0  &&  (*mix.rbegin())->IsNull() ) {            // do not create duplicate NULLs            return;        }    }    mix.push_back(loc);}CRef<CSeq_loc> CSeq_loc_Mapper::Map(const CSeq_loc& src_loc){    m_Dst_loc.Reset();    m_Partial = false; // reset for each location    x_MapSeq_loc(src_loc);    x_PushRangesToDstMix();    x_OptimizeSeq_loc(m_Dst_loc);    return m_Dst_loc;}CRef<CSeq_align> CSeq_loc_Mapper::Map(const CSeq_align& src_align){    m_Dst_loc.Reset();    m_Partial = false; // reset for each location    return x_MapSeq_align(src_align);}void CSeq_loc_Mapper::x_MapSeq_loc(const CSeq_loc& src_loc){    switch ( src_loc.Which() ) {    case CSeq_loc::e_Null:        if (m_GapFlag == eGapRemove) {            return;        }        // Proceed to seq-loc duplication    case CSeq_loc::e_not_set:    case CSeq_loc::e_Feat:    {        x_PushRangesToDstMix();        CRef<CSeq_loc> loc(new CSeq_loc);        loc->Assign(src_loc);        x_PushLocToDstMix(loc);        break;    }    case CSeq_loc::e_Empty:    {        bool res = false;        TRangeIterator mit = x_BeginMappingRanges(            CSeq_id_Handle::GetHandle(src_loc.GetEmpty()),            TRange::GetWhole().GetFrom(),            TRange::GetWhole().GetTo());        for ( ; mit; ++mit) {            CMappingRange& cvt = *mit->second;            if ( cvt.GoodSrcId(src_loc.GetEmpty()) ) {                x_GetMappedRanges(                    CSeq_id_Handle::GetHandle(src_loc.GetEmpty()), 0)                    .push_back(TRangeWithFuzz(TRange::GetEmpty(),                    TRangeFuzz(kEmptyFuzz, kEmptyFuzz)));                res = true;                break;            }        }        if ( !res ) {            if ( m_KeepNonmapping ) {                x_PushRangesToDstMix();                CRef<CSeq_loc> loc(new CSeq_loc);                loc->Assign(src_loc);                x_PushLocToDstMix(loc);            }            else {                m_Partial = true;            }        }        break;    }    case CSeq_loc::e_Whole:    {        const CSeq_id& src_id = src_loc.GetWhole();        // Convert to the allowed master seq interval        TSeqPos src_to = kInvalidSeqPos;        if ( m_Scope ) {            CBioseq_Handle bh = m_Scope->GetBioseqHandle(src_id);            src_to = bh ? bh.GetBioseqLength() : kInvalidSeqPos;        }        bool res = x_MapInterval(src_id, TRange(0, src_to),            false, eNa_strand_unknown,            TRangeFuzz(kEmptyFuzz, kEmptyFuzz));        if ( !res ) {            if ( m_KeepNonmapping ) {                x_PushRangesToDstMix();                CRef<CSeq_loc> loc(new CSeq_loc);                loc->Assign(src_loc);                x_PushLocToDstMix(loc);            }            else {                m_Partial = true;            }        }        break;    }    case CSeq_loc::e_Int:    {        const CSeq_interval& src_int = src_loc.GetInt();        TRangeFuzz fuzz(kEmptyFuzz, kEmptyFuzz);        if ( src_int.IsSetFuzz_from() ) {            fuzz.first.Reset(new CInt_fuzz);            fuzz.first->Assign(src_int.GetFuzz_from());        }        if ( src_int.IsSetFuzz_to() ) {            fuzz.second.Reset(new CInt_fuzz);            fuzz.second->Assign(src_int.GetFuzz_to());        }        bool res = x_MapInterval(src_int.GetId(),            TRange(src_int.GetFrom(), src_int.GetTo()),            src_int.IsSetStrand(),            src_int.IsSetStrand() ? src_int.GetStrand() : eNa_strand_unknown,            fuzz);        if ( !res ) {            if ( m_KeepNonmapping ) {                x_PushRangesToDstMix();                CRef<CSeq_loc> loc(new CSeq_loc);                loc->Assign(src_loc);                x_PushLocToDstMix(loc);            }            else {                m_Partial = true;            }        }        break;    }    case CSeq_loc::e_Pnt:    {        const CSeq_point& pnt = src_loc.GetPnt();        TRangeFuzz fuzz(kEmptyFuzz, kEmptyFuzz);        if ( pnt.IsSetFuzz() ) {            fuzz.first.Reset(new CInt_fuzz);            fuzz.first->Assign(pnt.GetFuzz());        }        bool res = x_MapInterval(pnt.GetId(),            TRange(pnt.GetPoint(), pnt.GetPoint()),            pnt.IsSetStrand(),            pnt.IsSetStrand() ? pnt.GetStrand() : eNa_strand_unknown,            fuzz);        if ( !res ) {            if ( m_KeepNonmapping ) {                x_PushRangesToDstMix();                CRef<CSeq_loc> loc(new CSeq_loc);                loc->Assign(src_loc);                x_PushLocToDstMix(loc);            }            else {                m_Partial = true;            }        }        break;    }    case CSeq_loc::e_Packed_int:    {        const CPacked_seqint::Tdata& src_ints = src_loc.GetPacked_int().Get();        ITERATE ( CPacked_seqint::Tdata, i, src_ints ) {            const CSeq_interval& si = **i;            TRangeFuzz fuzz(kEmptyFuzz, kEmptyFuzz);            if ( si.IsSetFuzz_from() ) {                fuzz.first.Reset(new CInt_fuzz);                fuzz.first->Assign(si.GetFuzz_from());            }            if ( si.IsSetFuzz_to() ) {                fuzz.second.Reset(new CInt_fuzz);                fuzz.second->Assign(si.GetFuzz_to());            }            bool res = x_MapInterval(si.GetId(),                TRange(si.GetFrom(), si.GetTo()),                si.IsSetStrand(),                si.IsSetStrand() ? si.GetStrand() : eNa_strand_unknown,                fuzz);            if ( !res ) {                if ( m_KeepNonmapping ) {                    x_PushRangesToDstMix();                    x_GetMappedRanges(CSeq_id_Handle::GetHandle(si.GetId()),                        si.IsSetStrand() ? int(si.GetStrand()) + 1 : 0)                        .push_back(TRangeWithFuzz(                        TRange(si.GetFrom(), si.GetTo()), fuzz));                }                else {                    m_Partial = true;                }            }        }        break;    }    case CSeq_loc::e_Packed_pnt:    {        const CPacked_seqpnt& src_pack_pnts = src_loc.GetPacked_pnt();        const CPacked_seqpnt::TPoints& src_pnts = src_pack_pnts.GetPoints();        ITERATE ( CPacked_seqpnt::TPoints, i, src_pnts ) {            TRangeFuzz fuzz(kEmptyFuzz, kEmptyFuzz);            if ( src_pack_pnts.IsSetFuzz() ) {                fuzz.first.Reset(new CInt_fuzz);                fuzz.first->Assign(src_pack_pnts.GetFuzz());            }            bool res = x_MapInterval(                src_pack_pnts.GetId(),                TRange(*i, *i), src_pack_pnts.IsSetStrand(),                src_pack_pnts.IsSetStrand() ?                src_pack_pnts.GetStrand() : eNa_strand_unknown,                fuzz);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?