sequence.cpp

来自「ncbi源码」· C++ 代码 · 共 2,155 行 · 第 1/5 页

CPP
2,155
字号
        }    }    return rl.Resolve(scope, rl_flags);}CRef<CSeq_loc> ProductToSource(const CSeq_feat& feat, const CSeq_loc& prod_loc,                               TP2SFlags flags, CScope* scope){    SRelLoc rl(feat.GetProduct(), prod_loc, scope);    _ASSERT(!rl.m_Ranges.empty());    rl.m_ParentLoc.Reset(&feat.GetLocation());    if (feat.GetData().IsCdregion()) {        // 3:1 ratio        const CCdregion& cds        = feat.GetData().GetCdregion();        int              base_frame = cds.GetFrame();        if (base_frame > 0) {            --base_frame;        }        TSeqPos nuc_length, prot_length;        try {            nuc_length = GetLength(feat.GetLocation(), scope);        } catch (CNoLength) {            nuc_length = numeric_limits<TSeqPos>::max();        }        try {            prot_length = GetLength(feat.GetProduct(), scope);        } catch (CNoLength) {            prot_length = numeric_limits<TSeqPos>::max();        }        NON_CONST_ITERATE(SRelLoc::TRanges, it, rl.m_Ranges) {            _ASSERT( !IsReverse((*it)->GetStrand()) );            TSeqPos from, to;            if ((flags & fP2S_Extend)  &&  (*it)->GetFrom() == 0) {                from = 0;            } else {                from = (*it)->GetFrom() * 3 + base_frame;            }            if ((flags & fP2S_Extend)  &&  (*it)->GetTo() == prot_length - 1) {                to = nuc_length - 1;            } else {                to = (*it)->GetTo() * 3 + base_frame + 2;            }            (*it)->SetFrom(from);            (*it)->SetTo  (to);        }    }    return rl.Resolve(scope);}TSeqPos LocationOffset(const CSeq_loc& outer, const CSeq_loc& inner,                       EOffsetType how, CScope* scope){    SRelLoc rl(outer, inner, scope);    if (rl.m_Ranges.empty()) {        return (TSeqPos)-1;    }    bool want_reverse = false;    {{        bool outer_is_reverse = IsReverse(GetStrand(outer, scope));        switch (how) {        case eOffset_FromStart:            want_reverse = false;            break;        case eOffset_FromEnd:            want_reverse = true;            break;        case eOffset_FromLeft:            want_reverse = outer_is_reverse;            break;        case eOffset_FromRight:            want_reverse = !outer_is_reverse;            break;        }    }}    if (want_reverse) {        return GetLength(outer, scope) - rl.m_Ranges.back()->GetTo();    } else {        return rl.m_Ranges.front()->GetFrom();    }}bool TestForStrands(ENa_strand strand1, ENa_strand strand2){    // Check strands. Overlapping rules for strand:    //   - equal strands overlap    //   - "both" overlaps with any other    //   - "unknown" overlaps with any other except "minus"    return strand1 == strand2        || strand1 == eNa_strand_both        || strand2 == eNa_strand_both        || (strand1 == eNa_strand_unknown  && strand2 != eNa_strand_minus)        || (strand2 == eNa_strand_unknown  && strand1 != eNa_strand_minus);}bool TestForIntervals(CSeq_loc_CI it1, CSeq_loc_CI it2, bool minus_strand){    // Check intervals one by one    while ( it1  &&  it2 ) {        if ( !TestForStrands(it1.GetStrand(), it2.GetStrand())  ||             !it1.GetSeq_id().Equals(it2.GetSeq_id())) {            return false;        }        if ( minus_strand ) {            if (it1.GetRange().GetFrom()  !=  it2.GetRange().GetFrom() ) {                // The last interval from loc2 may be shorter than the                // current interval from loc1                if (it1.GetRange().GetFrom() > it2.GetRange().GetFrom()  ||                    ++it2) {                    return false;                }                break;            }        }        else {            if (it1.GetRange().GetTo()  !=  it2.GetRange().GetTo() ) {                // The last interval from loc2 may be shorter than the                // current interval from loc1                if (it1.GetRange().GetTo() < it2.GetRange().GetTo()  ||                    ++it2) {                    return false;                }                break;            }        }        // Go to the next interval start        if ( !(++it2) ) {            break;        }        if ( !(++it1) ) {            return false; // loc1 has not enough intervals        }        if ( minus_strand ) {            if (it1.GetRange().GetTo() != it2.GetRange().GetTo()) {                return false;            }        }        else {            if (it1.GetRange().GetFrom() != it2.GetRange().GetFrom()) {                return false;            }        }    }    return true;}int x_TestForOverlap_MultiSeq(const CSeq_loc& loc1,                              const CSeq_loc& loc2,                              EOverlapType type){    // Special case of TestForOverlap() - multi-sequences locations    switch (type) {    case eOverlap_Simple:        {            // Find all intersecting intervals            int diff = 0;            for (CSeq_loc_CI li1(loc1); li1; ++li1) {                for (CSeq_loc_CI li2(loc2); li2; ++li2) {                    if ( !li1.GetSeq_id().Equals(li2.GetSeq_id()) ) {                        continue;                    }                    CRange<TSeqPos> rg1 = li1.GetRange();                    CRange<TSeqPos> rg2 = li2.GetRange();                    if ( rg1.GetTo() >= rg2.GetFrom()  &&                         rg1.GetFrom() <= rg2.GetTo() ) {                        diff += abs(int(rg2.GetFrom() - rg1.GetFrom())) +                            abs(int(rg1.GetTo() - rg2.GetTo()));                    }                }            }            return diff ? diff : -1;        }    case eOverlap_Contained:        {            // loc2 is contained in loc1            CHandleRangeMap rm1, rm2;            rm1.AddLocation(loc1);            rm2.AddLocation(loc2);            int diff = 0;            CHandleRangeMap::const_iterator it2 = rm2.begin();            for ( ; it2 != rm2.end(); ++it2) {                CHandleRangeMap::const_iterator it1 =                    rm1.GetMap().find(it2->first);                if (it1 == rm1.end()) {                    // loc2 has region on a sequence not present in loc1                    diff = -1;                    break;                }                CRange<TSeqPos> rg1 = it1->second.GetOverlappingRange();                CRange<TSeqPos> rg2 = it2->second.GetOverlappingRange();                if ( rg1.GetFrom() <= rg2.GetFrom()  &&                    rg1.GetTo() >= rg2.GetTo() ) {                    diff += (rg2.GetFrom() - rg1.GetFrom()) +                        (rg1.GetTo() - rg2.GetTo());                }                else {                    // Found an interval on loc2 not contained in loc1                    diff = -1;                    break;                }            }            return diff;        }    case eOverlap_Contains:        {            // loc2 is contained in loc1            CHandleRangeMap rm1, rm2;            rm1.AddLocation(loc1);            rm2.AddLocation(loc2);            int diff = 0;            CHandleRangeMap::const_iterator it1 = rm1.begin();            for ( ; it1 != rm1.end(); ++it1) {                CHandleRangeMap::const_iterator it2 =                    rm2.GetMap().find(it1->first);                if (it2 == rm2.end()) {                    // loc1 has region on a sequence not present in loc2                    diff = -1;                    break;                }                CRange<TSeqPos> rg1 = it1->second.GetOverlappingRange();                CRange<TSeqPos> rg2 = it2->second.GetOverlappingRange();                if ( rg2.GetFrom() <= rg1.GetFrom()  &&                    rg2.GetTo() >= rg1.GetTo() ) {                    diff += (rg1.GetFrom() - rg2.GetFrom()) +                        (rg2.GetTo() - rg1.GetTo());                }                else {                    // Found an interval on loc1 not contained in loc2                    diff = -1;                    break;                }            }            return diff;        }    case eOverlap_Subset:    case eOverlap_CheckIntervals:    case eOverlap_Interval:        {            // For this types the function should not be called            NCBI_THROW(CObjmgrUtilException, eNotImplemented,                "TestForOverlap() -- error processing multi-ID seq-loc");        }    }    return -1;}int TestForOverlap(const CSeq_loc& loc1,                   const CSeq_loc& loc2,                   EOverlapType type,                   TSeqPos circular_len){    CRange<TSeqPos> rg1, rg2;    bool multi_seq = false;    try {        rg1 = loc1.GetTotalRange();        rg2 = loc2.GetTotalRange();    }    catch (exception&) {        // Can not use total range for multi-sequence locations        if (type == eOverlap_Simple  ||            type == eOverlap_Contained  ||            type == eOverlap_Contains) {            // Can not process circular multi-id locations            if (circular_len != 0  &&  circular_len != kInvalidSeqPos) {                throw;            }            return x_TestForOverlap_MultiSeq(loc1, loc2, type);        }        multi_seq = true;    }    ENa_strand strand1 = GetStrand(loc1);    ENa_strand strand2 = GetStrand(loc2);    if ( !TestForStrands(strand1, strand2) )        return -1;    switch (type) {    case eOverlap_Simple:        {            if (circular_len != kInvalidSeqPos) {                TSeqPos from1 = loc1.GetStart(circular_len);                TSeqPos from2 = loc2.GetStart(circular_len);                TSeqPos to1 = loc1.GetEnd(circular_len);                TSeqPos to2 = loc2.GetEnd(circular_len);                if (from1 > to1) {                    if (from2 > to2) {                        // Both locations are circular and must intersect at 0                        return abs(int(from2 - from1)) +                            abs(int(to1 - to2));                    }                    else {                        // Only the first location is circular, rg2 may be used                        // for the second one.                        int loc_len = rg2.GetLength() + loc1.GetCircularLength(circular_len);                        if (from1 < rg2.GetFrom()  ||  to1 > rg2.GetTo()) {                            // loc2 is completely in loc1                            return loc_len - 2*rg2.GetLength();                        }                        if (from1 < rg2.GetTo()) {                            return loc_len - (rg2.GetTo() - from1);                        }                        if (rg2.GetFrom() < to1) {                            return loc_len - (to1 - rg2.GetFrom());                        }                        return -1;                    }                }                else if (from2 > to2) {                    // Only the second location is circular                    int loc_len = rg1.GetLength() + loc2.GetCircularLength(circular_len);                    if (from2 < rg1.GetFrom()  ||  to2 > rg1.GetTo()) {                        // loc2 is completely in loc1                        return loc_len - 2*rg1.GetLength();                    }                    if (from2 < rg1.GetTo()) {                        return loc_len - (rg1.GetTo() - from2);                    }                    if (rg1.GetFrom() < to2) {                        return loc_len - (to2 - rg1.GetFrom());                    }                    return -1;                }                // Locations are not circular, proceed to normal calculations            }            if ( rg1.GetTo() >= rg2.GetFrom()  &&                rg1.GetFrom() <= rg2.GetTo() ) {                return abs(int(rg2.GetFrom() - rg1.GetFrom())) +                    abs(int(rg1.GetTo() - rg2.GetTo()));            }            return -1;        }    case eOverlap_Contained:        {            if (circular_len != kInvalidSeqPos) {                TSeqPos from1 = loc1.GetStart(circular_len);                TSeqPos from2 = loc2.GetStart(circular_len);                TSeqPos to1 = loc1.GetEnd(circular_len);                TSeqPos to2 = loc2.GetEnd(circular_len);                if (from1 > to1) {                    if (from2 > to2) {                        return (from1 <= from2  &&  to1 >= to2) ?                            (from2 - from1) - (to1 - to2) : -1;                    }                    else {                        if (rg2.GetFrom() >= from1  ||  rg2.GetTo() <= to1) {                            return loc1.GetCircularLength(circular_len) -                                rg2.GetLength();                        }                        return -1;                    }                }                else if (from2 > to2) {                    // Non-circular location can not contain a circular one                    return -1;                }            }            if ( rg1.GetFrom() <= rg2.GetFrom()  &&                rg1.GetTo() >= rg2.GetTo() ) {                return (rg2.GetFrom() - rg1.GetFrom()) +                    (rg1.GetTo() - rg2.GetTo());            }            return -1;        }    case eOverlap_Contains:        {            if (circular_len != kInvalidSeqPos) {                TSeqPos from1 = loc1.GetStart(circular_len);                TSeqPos from2 = loc2.GetStart(circular_len);                TSeqPos to1 = loc1.GetEnd(circular_len);                TSeqPos to2 = loc2.GetEnd(circular_len);                if (from1 > to1) {                    if (from2 > to2) {                        return (from2 <= from1  &&  to2 >= to1) ?                            (from1 - from2) - (to2 - to1) : -1;                    }                    else {                        // Non-circular location can not contain a circular one                        return -1;                    }                }                else if (from2 > to2) {                    if (rg1.GetFrom() >= from2  ||  rg1.GetTo() <= to2) {                        return loc2.GetCircularLength(circular_len) -                            rg1.GetLength();                    }                    return -1;                }            }            if ( rg2.GetFrom() <= rg1.GetFrom()  &&                rg2.GetTo() >= rg1.GetTo() ) {                return (rg1.GetFrom() - rg2.GetFrom()) +                    (rg2.GetTo() - rg1.GetTo());            }            return -1;        }    case eOverlap_Subset:        {            // loc1 should contain loc2            if ( Compare(loc1, loc2) != eContains ) {                return -1;            }            return GetLength(loc1) - GetLength(loc2);        }    case eOverlap_CheckIntervals:        {            if ( !multi_seq  &&                (rg1.GetFrom() > rg2.GetTo()  || rg1.GetTo() < rg2.GetTo()) ) {                return -1;            }            // Check intervals' boundaries            CSeq_loc_CI it1(loc1);            CSeq_loc_CI it2(loc2);            if (!it1  ||  !it2) {                break;            }            // check case when strand is minus            if (it2.GetStrand() == eNa_strand_minus) {                // The first interval should be treated as the last one                // for minuns strands.                TSeqPos loc2end = it2.GetRange().GetTo();                TSeqPos loc2start = it2.GetRange().GetFrom();                // Find the first interval in loc1 intersecting with loc2                for ( ; it1  &&  it1.GetRange().GetTo() >= loc2start; ++it1) {                    if (it1.GetRange().GetTo() >= loc2end  &&                        TestForIntervals(it1, it2,

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?