sequence.cpp
来自「ncbi源码」· C++ 代码 · 共 2,155 行 · 第 1/5 页
CPP
2,155 行
/* * =========================================================================== * PRODUCTION $Log: sequence.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 19:25:30 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.82 * PRODUCTION * =========================================================================== *//* $Id: sequence.cpp,v 1000.3 2004/06/01 19:25:30 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Clifford Clausen** File Description:* Sequence utilities requiring CScope*/#include <ncbi_pch.hpp>#include <serial/iterator.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_vector_ci.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/impl/handle_range_map.hpp>#include <objmgr/impl/synonyms.hpp>#include <objects/general/Int_fuzz.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Delta_ext.hpp>#include <objects/seq/Delta_seq.hpp>#include <objects/seq/MolInfo.hpp>#include <objects/seq/Seg_ext.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_literal.hpp>#include <objects/seqloc/Packed_seqpnt.hpp>#include <objects/seqloc/Seq_bond.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_loc_equiv.hpp>#include <objects/seqloc/Seq_loc_mix.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Code_break.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objmgr/util/sequence.hpp>#include <util/strsearch.hpp>#include <list>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)BEGIN_SCOPE(sequence)const COrg_ref& GetOrg_ref(const CBioseq_Handle& handle){ {{ CSeqdesc_CI desc(handle, CSeqdesc::e_Source); if (desc) { return desc->GetSource().GetOrg(); } }} {{ CSeqdesc_CI desc(handle, CSeqdesc::e_Org); if (desc) { return desc->GetOrg(); } }} NCBI_THROW(CException, eUnknown, "No organism set");}int GetTaxId(const CBioseq_Handle& handle){ try { return GetOrg_ref(handle).GetTaxId(); } catch (...) { return 0; }}TSeqPos GetLength(const CSeq_id& id, CScope* scope){ if ( !scope ) { return numeric_limits<TSeqPos>::max(); } CBioseq_Handle hnd = scope->GetBioseqHandle(id); if ( !hnd ) { return numeric_limits<TSeqPos>::max(); } CBioseq_Handle::TBioseqCore core = hnd.GetBioseqCore(); return core->GetInst().IsSetLength() ? core->GetInst().GetLength() : numeric_limits<TSeqPos>::max();}TSeqPos GetLength(const CSeq_loc& loc, CScope* scope) THROWS((CNoLength)){ switch (loc.Which()) { case CSeq_loc::e_Pnt: return 1; case CSeq_loc::e_Int: return loc.GetInt().GetLength(); case CSeq_loc::e_Null: case CSeq_loc::e_Empty: return 0; case CSeq_loc::e_Whole: return GetLength(loc.GetWhole(), scope); case CSeq_loc::e_Packed_int: return loc.GetPacked_int().GetLength(); case CSeq_loc::e_Mix: return GetLength(loc.GetMix(), scope); case CSeq_loc::e_Packed_pnt: // just a bunch of points return loc.GetPacked_pnt().GetPoints().size(); case CSeq_loc::e_not_set: case CSeq_loc::e_Bond: //can't calculate length case CSeq_loc::e_Feat: case CSeq_loc::e_Equiv: // unless actually the same length... default: THROW0_TRACE(CNoLength()); }}TSeqPos GetLength(const CSeq_loc_mix& mix, CScope* scope) THROWS((CNoLength)){ TSeqPos length = 0; ITERATE( CSeq_loc_mix::Tdata, i, mix.Get() ) { TSeqPos ret = GetLength((**i), scope); length += ret; } return length;}bool IsValid(const CSeq_point& pt, CScope* scope){ if (static_cast<TSeqPos>(pt.GetPoint()) >= GetLength(pt.GetId(), scope) ) { return false; } return true;}bool IsValid(const CPacked_seqpnt& pts, CScope* scope){ typedef CPacked_seqpnt::TPoints TPoints; TSeqPos length = GetLength(pts.GetId(), scope); ITERATE (TPoints, it, pts.GetPoints()) { if (*it >= length) { return false; } } return true;}bool IsValid(const CSeq_interval& interval, CScope* scope){ if (interval.GetFrom() > interval.GetTo() || interval.GetTo() >= GetLength(interval.GetId(), scope)) { return false; } return true;}bool IsSameBioseq (const CSeq_id& id1, const CSeq_id& id2, CScope* scope){ // Compare CSeq_ids directly if (id1.Compare(id2) == CSeq_id::e_YES) { return true; } // Compare handles if ( scope ) { try { CBioseq_Handle hnd1 = scope->GetBioseqHandle(id1); CBioseq_Handle hnd2 = scope->GetBioseqHandle(id2); return hnd1 && hnd2 && (hnd1 == hnd2); } catch (exception& e) { ERR_POST(e.what() << ": CSeq_id1: " << id1.DumpAsFasta() << ": CSeq_id2: " << id2.DumpAsFasta()); return false; } } return false;}bool IsOneBioseq(const CSeq_loc& loc, CScope* scope){ try { GetId(loc, scope); return true; } catch (CNotUnique&) { return false; }}class CSeqIdFromHandleException : EXCEPTION_VIRTUAL_BASE public CException{public: // Enumerated list of document management errors enum EErrCode { eNoSynonyms, eRequestedIdNotFound }; // Translate the specific error code into a string representations of // that error code. virtual const char* GetErrCodeString(void) const { switch (GetErrCode()) { case eNoSynonyms: return "eNoSynonyms"; case eRequestedIdNotFound: return "eRequestedIdNotFound"; default: return CException::GetErrCodeString(); } } NCBI_EXCEPTION_DEFAULT(CSeqIdFromHandleException, CException);};const CSeq_id& GetId(const CBioseq_Handle& handle, EGetIdType type){ switch (type) { case eGetId_HandleDefault: return *handle.GetSeqId(); case eGetId_ForceGi: if (handle.GetSeqId().GetPointer() && handle.GetSeqId()->IsGi()) { return *handle.GetSeqId(); } {{ CConstRef<CSynonymsSet> syns = handle.GetScope().GetSynonyms(*handle.GetSeqId()); if ( !syns ) { string msg("No synonyms found for sequence "); handle.GetSeqId()->GetLabel(&msg); NCBI_THROW(CSeqIdFromHandleException, eNoSynonyms, msg); } ITERATE (CSynonymsSet, iter, *syns) { CSeq_id_Handle idh = CSynonymsSet::GetSeq_id_Handle(iter); if (idh.GetSeqId()->IsGi()) { return *idh.GetSeqId(); } } }} break; case eGetId_Best: {{ CConstRef<CSynonymsSet> syns = handle.GetScope().GetSynonyms(handle.GetSeq_id_Handle()); if ( !syns ) { string msg("No synonyms found for sequence "); handle.GetSeqId()->GetLabel(&msg); NCBI_THROW(CSeqIdFromHandleException, eNoSynonyms, msg); } list< CRef<CSeq_id> > ids; ITERATE (CSynonymsSet, iter, *syns) { CSeq_id_Handle idh = CSynonymsSet::GetSeq_id_Handle(iter); ids.push_back (CRef<CSeq_id>(const_cast<CSeq_id*>(idh.GetSeqId().GetPointer()))); } CConstRef<CSeq_id> best_id = FindBestChoice(ids, CSeq_id::Score); if (best_id) { return *best_id; } }} break; default: NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound, "Unhandled seq-id type"); break; } NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound, "No best seq-id could be found");}const CSeq_id& GetId(const CSeq_loc& loc, CScope* scope) THROWS((CNotUnique)){ CTypeConstIterator<CSeq_id> cur = ConstBegin(loc); CTypeConstIterator<CSeq_id> first = ConstBegin(loc); if (!first) { THROW0_TRACE(CNotUnique()); } for (++cur; cur; ++cur) { if ( !IsSameBioseq(*cur, *first, scope) ) { THROW0_TRACE(CNotUnique()); } } return *first;}inlinestatic ENa_strand s_GetStrand(const CSeq_loc& loc){ switch (loc.Which()) { case CSeq_loc::e_Bond: { const CSeq_bond& bond = loc.GetBond(); ENa_strand a_strand = bond.GetA().IsSetStrand() ? bond.GetA().GetStrand() : eNa_strand_unknown; ENa_strand b_strand = eNa_strand_unknown; if ( bond.IsSetB() ) { b_strand = bond.GetB().IsSetStrand() ? bond.GetB().GetStrand() : eNa_strand_unknown; } if ( a_strand == eNa_strand_unknown && b_strand != eNa_strand_unknown ) { a_strand = b_strand; } else if ( a_strand != eNa_strand_unknown && b_strand == eNa_strand_unknown ) { b_strand = a_strand; } return (a_strand != b_strand) ? eNa_strand_other : a_strand; } case CSeq_loc::e_Whole: return eNa_strand_both; case CSeq_loc::e_Int: return loc.GetInt().IsSetStrand() ? loc.GetInt().GetStrand() : eNa_strand_unknown; case CSeq_loc::e_Pnt: return loc.GetPnt().IsSetStrand() ? loc.GetPnt().GetStrand() : eNa_strand_unknown; case CSeq_loc::e_Packed_pnt: return loc.GetPacked_pnt().IsSetStrand() ? loc.GetPacked_pnt().GetStrand() : eNa_strand_unknown; case CSeq_loc::e_Packed_int: { ENa_strand strand = eNa_strand_unknown; bool strand_set = false; ITERATE(CPacked_seqint::Tdata, i, loc.GetPacked_int().Get()) { ENa_strand istrand = (*i)->IsSetStrand() ? (*i)->GetStrand() : eNa_strand_unknown; if (strand == eNa_strand_unknown && istrand == eNa_strand_plus) { strand = eNa_strand_plus; strand_set = true; } else if (strand == eNa_strand_plus && istrand == eNa_strand_unknown) { istrand = eNa_strand_plus; strand_set = true; } else if (!strand_set) { strand = istrand; strand_set = true; } else if (istrand != strand) { return eNa_strand_other; } } return strand; } case CSeq_loc::e_Mix: { ENa_strand strand = eNa_strand_unknown; bool strand_set = false; ITERATE(CSeq_loc_mix::Tdata, it, loc.GetMix().Get()) { ENa_strand istrand = GetStrand(**it); if (strand == eNa_strand_unknown && istrand == eNa_strand_plus) { strand = eNa_strand_plus; strand_set = true; } else if (strand == eNa_strand_plus && istrand == eNa_strand_unknown) { istrand = eNa_strand_plus;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?