sequence.cpp
来自「ncbi源码」· C++ 代码 · 共 2,155 行 · 第 1/5 页
CPP
2,155 行
} } return rl.Resolve(scope, rl_flags);}CRef<CSeq_loc> ProductToSource(const CSeq_feat& feat, const CSeq_loc& prod_loc, TP2SFlags flags, CScope* scope){ SRelLoc rl(feat.GetProduct(), prod_loc, scope); _ASSERT(!rl.m_Ranges.empty()); rl.m_ParentLoc.Reset(&feat.GetLocation()); if (feat.GetData().IsCdregion()) { // 3:1 ratio const CCdregion& cds = feat.GetData().GetCdregion(); int base_frame = cds.GetFrame(); if (base_frame > 0) { --base_frame; } TSeqPos nuc_length, prot_length; try { nuc_length = GetLength(feat.GetLocation(), scope); } catch (CNoLength) { nuc_length = numeric_limits<TSeqPos>::max(); } try { prot_length = GetLength(feat.GetProduct(), scope); } catch (CNoLength) { prot_length = numeric_limits<TSeqPos>::max(); } NON_CONST_ITERATE(SRelLoc::TRanges, it, rl.m_Ranges) { _ASSERT( !IsReverse((*it)->GetStrand()) ); TSeqPos from, to; if ((flags & fP2S_Extend) && (*it)->GetFrom() == 0) { from = 0; } else { from = (*it)->GetFrom() * 3 + base_frame; } if ((flags & fP2S_Extend) && (*it)->GetTo() == prot_length - 1) { to = nuc_length - 1; } else { to = (*it)->GetTo() * 3 + base_frame + 2; } (*it)->SetFrom(from); (*it)->SetTo (to); } } return rl.Resolve(scope);}TSeqPos LocationOffset(const CSeq_loc& outer, const CSeq_loc& inner, EOffsetType how, CScope* scope){ SRelLoc rl(outer, inner, scope); if (rl.m_Ranges.empty()) { return (TSeqPos)-1; } bool want_reverse = false; {{ bool outer_is_reverse = IsReverse(GetStrand(outer, scope)); switch (how) { case eOffset_FromStart: want_reverse = false; break; case eOffset_FromEnd: want_reverse = true; break; case eOffset_FromLeft: want_reverse = outer_is_reverse; break; case eOffset_FromRight: want_reverse = !outer_is_reverse; break; } }} if (want_reverse) { return GetLength(outer, scope) - rl.m_Ranges.back()->GetTo(); } else { return rl.m_Ranges.front()->GetFrom(); }}bool TestForStrands(ENa_strand strand1, ENa_strand strand2){ // Check strands. Overlapping rules for strand: // - equal strands overlap // - "both" overlaps with any other // - "unknown" overlaps with any other except "minus" return strand1 == strand2 || strand1 == eNa_strand_both || strand2 == eNa_strand_both || (strand1 == eNa_strand_unknown && strand2 != eNa_strand_minus) || (strand2 == eNa_strand_unknown && strand1 != eNa_strand_minus);}bool TestForIntervals(CSeq_loc_CI it1, CSeq_loc_CI it2, bool minus_strand){ // Check intervals one by one while ( it1 && it2 ) { if ( !TestForStrands(it1.GetStrand(), it2.GetStrand()) || !it1.GetSeq_id().Equals(it2.GetSeq_id())) { return false; } if ( minus_strand ) { if (it1.GetRange().GetFrom() != it2.GetRange().GetFrom() ) { // The last interval from loc2 may be shorter than the // current interval from loc1 if (it1.GetRange().GetFrom() > it2.GetRange().GetFrom() || ++it2) { return false; } break; } } else { if (it1.GetRange().GetTo() != it2.GetRange().GetTo() ) { // The last interval from loc2 may be shorter than the // current interval from loc1 if (it1.GetRange().GetTo() < it2.GetRange().GetTo() || ++it2) { return false; } break; } } // Go to the next interval start if ( !(++it2) ) { break; } if ( !(++it1) ) { return false; // loc1 has not enough intervals } if ( minus_strand ) { if (it1.GetRange().GetTo() != it2.GetRange().GetTo()) { return false; } } else { if (it1.GetRange().GetFrom() != it2.GetRange().GetFrom()) { return false; } } } return true;}int x_TestForOverlap_MultiSeq(const CSeq_loc& loc1, const CSeq_loc& loc2, EOverlapType type){ // Special case of TestForOverlap() - multi-sequences locations switch (type) { case eOverlap_Simple: { // Find all intersecting intervals int diff = 0; for (CSeq_loc_CI li1(loc1); li1; ++li1) { for (CSeq_loc_CI li2(loc2); li2; ++li2) { if ( !li1.GetSeq_id().Equals(li2.GetSeq_id()) ) { continue; } CRange<TSeqPos> rg1 = li1.GetRange(); CRange<TSeqPos> rg2 = li2.GetRange(); if ( rg1.GetTo() >= rg2.GetFrom() && rg1.GetFrom() <= rg2.GetTo() ) { diff += abs(int(rg2.GetFrom() - rg1.GetFrom())) + abs(int(rg1.GetTo() - rg2.GetTo())); } } } return diff ? diff : -1; } case eOverlap_Contained: { // loc2 is contained in loc1 CHandleRangeMap rm1, rm2; rm1.AddLocation(loc1); rm2.AddLocation(loc2); int diff = 0; CHandleRangeMap::const_iterator it2 = rm2.begin(); for ( ; it2 != rm2.end(); ++it2) { CHandleRangeMap::const_iterator it1 = rm1.GetMap().find(it2->first); if (it1 == rm1.end()) { // loc2 has region on a sequence not present in loc1 diff = -1; break; } CRange<TSeqPos> rg1 = it1->second.GetOverlappingRange(); CRange<TSeqPos> rg2 = it2->second.GetOverlappingRange(); if ( rg1.GetFrom() <= rg2.GetFrom() && rg1.GetTo() >= rg2.GetTo() ) { diff += (rg2.GetFrom() - rg1.GetFrom()) + (rg1.GetTo() - rg2.GetTo()); } else { // Found an interval on loc2 not contained in loc1 diff = -1; break; } } return diff; } case eOverlap_Contains: { // loc2 is contained in loc1 CHandleRangeMap rm1, rm2; rm1.AddLocation(loc1); rm2.AddLocation(loc2); int diff = 0; CHandleRangeMap::const_iterator it1 = rm1.begin(); for ( ; it1 != rm1.end(); ++it1) { CHandleRangeMap::const_iterator it2 = rm2.GetMap().find(it1->first); if (it2 == rm2.end()) { // loc1 has region on a sequence not present in loc2 diff = -1; break; } CRange<TSeqPos> rg1 = it1->second.GetOverlappingRange(); CRange<TSeqPos> rg2 = it2->second.GetOverlappingRange(); if ( rg2.GetFrom() <= rg1.GetFrom() && rg2.GetTo() >= rg1.GetTo() ) { diff += (rg1.GetFrom() - rg2.GetFrom()) + (rg2.GetTo() - rg1.GetTo()); } else { // Found an interval on loc1 not contained in loc2 diff = -1; break; } } return diff; } case eOverlap_Subset: case eOverlap_CheckIntervals: case eOverlap_Interval: { // For this types the function should not be called NCBI_THROW(CObjmgrUtilException, eNotImplemented, "TestForOverlap() -- error processing multi-ID seq-loc"); } } return -1;}int TestForOverlap(const CSeq_loc& loc1, const CSeq_loc& loc2, EOverlapType type, TSeqPos circular_len){ CRange<TSeqPos> rg1, rg2; bool multi_seq = false; try { rg1 = loc1.GetTotalRange(); rg2 = loc2.GetTotalRange(); } catch (exception&) { // Can not use total range for multi-sequence locations if (type == eOverlap_Simple || type == eOverlap_Contained || type == eOverlap_Contains) { // Can not process circular multi-id locations if (circular_len != 0 && circular_len != kInvalidSeqPos) { throw; } return x_TestForOverlap_MultiSeq(loc1, loc2, type); } multi_seq = true; } ENa_strand strand1 = GetStrand(loc1); ENa_strand strand2 = GetStrand(loc2); if ( !TestForStrands(strand1, strand2) ) return -1; switch (type) { case eOverlap_Simple: { if (circular_len != kInvalidSeqPos) { TSeqPos from1 = loc1.GetStart(circular_len); TSeqPos from2 = loc2.GetStart(circular_len); TSeqPos to1 = loc1.GetEnd(circular_len); TSeqPos to2 = loc2.GetEnd(circular_len); if (from1 > to1) { if (from2 > to2) { // Both locations are circular and must intersect at 0 return abs(int(from2 - from1)) + abs(int(to1 - to2)); } else { // Only the first location is circular, rg2 may be used // for the second one. int loc_len = rg2.GetLength() + loc1.GetCircularLength(circular_len); if (from1 < rg2.GetFrom() || to1 > rg2.GetTo()) { // loc2 is completely in loc1 return loc_len - 2*rg2.GetLength(); } if (from1 < rg2.GetTo()) { return loc_len - (rg2.GetTo() - from1); } if (rg2.GetFrom() < to1) { return loc_len - (to1 - rg2.GetFrom()); } return -1; } } else if (from2 > to2) { // Only the second location is circular int loc_len = rg1.GetLength() + loc2.GetCircularLength(circular_len); if (from2 < rg1.GetFrom() || to2 > rg1.GetTo()) { // loc2 is completely in loc1 return loc_len - 2*rg1.GetLength(); } if (from2 < rg1.GetTo()) { return loc_len - (rg1.GetTo() - from2); } if (rg1.GetFrom() < to2) { return loc_len - (to2 - rg1.GetFrom()); } return -1; } // Locations are not circular, proceed to normal calculations } if ( rg1.GetTo() >= rg2.GetFrom() && rg1.GetFrom() <= rg2.GetTo() ) { return abs(int(rg2.GetFrom() - rg1.GetFrom())) + abs(int(rg1.GetTo() - rg2.GetTo())); } return -1; } case eOverlap_Contained: { if (circular_len != kInvalidSeqPos) { TSeqPos from1 = loc1.GetStart(circular_len); TSeqPos from2 = loc2.GetStart(circular_len); TSeqPos to1 = loc1.GetEnd(circular_len); TSeqPos to2 = loc2.GetEnd(circular_len); if (from1 > to1) { if (from2 > to2) { return (from1 <= from2 && to1 >= to2) ? (from2 - from1) - (to1 - to2) : -1; } else { if (rg2.GetFrom() >= from1 || rg2.GetTo() <= to1) { return loc1.GetCircularLength(circular_len) - rg2.GetLength(); } return -1; } } else if (from2 > to2) { // Non-circular location can not contain a circular one return -1; } } if ( rg1.GetFrom() <= rg2.GetFrom() && rg1.GetTo() >= rg2.GetTo() ) { return (rg2.GetFrom() - rg1.GetFrom()) + (rg1.GetTo() - rg2.GetTo()); } return -1; } case eOverlap_Contains: { if (circular_len != kInvalidSeqPos) { TSeqPos from1 = loc1.GetStart(circular_len); TSeqPos from2 = loc2.GetStart(circular_len); TSeqPos to1 = loc1.GetEnd(circular_len); TSeqPos to2 = loc2.GetEnd(circular_len); if (from1 > to1) { if (from2 > to2) { return (from2 <= from1 && to2 >= to1) ? (from1 - from2) - (to2 - to1) : -1; } else { // Non-circular location can not contain a circular one return -1; } } else if (from2 > to2) { if (rg1.GetFrom() >= from2 || rg1.GetTo() <= to2) { return loc2.GetCircularLength(circular_len) - rg1.GetLength(); } return -1; } } if ( rg2.GetFrom() <= rg1.GetFrom() && rg2.GetTo() >= rg1.GetTo() ) { return (rg1.GetFrom() - rg2.GetFrom()) + (rg2.GetTo() - rg1.GetTo()); } return -1; } case eOverlap_Subset: { // loc1 should contain loc2 if ( Compare(loc1, loc2) != eContains ) { return -1; } return GetLength(loc1) - GetLength(loc2); } case eOverlap_CheckIntervals: { if ( !multi_seq && (rg1.GetFrom() > rg2.GetTo() || rg1.GetTo() < rg2.GetTo()) ) { return -1; } // Check intervals' boundaries CSeq_loc_CI it1(loc1); CSeq_loc_CI it2(loc2); if (!it1 || !it2) { break; } // check case when strand is minus if (it2.GetStrand() == eNa_strand_minus) { // The first interval should be treated as the last one // for minuns strands. TSeqPos loc2end = it2.GetRange().GetTo(); TSeqPos loc2start = it2.GetRange().GetFrom(); // Find the first interval in loc1 intersecting with loc2 for ( ; it1 && it1.GetRange().GetTo() >= loc2start; ++it1) { if (it1.GetRange().GetTo() >= loc2end && TestForIntervals(it1, it2,
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?