sequence.cpp

来自「ncbi源码」· C++ 代码 · 共 2,155 行 · 第 1/5 页

CPP
2,155
字号
/* * =========================================================================== * PRODUCTION $Log: sequence.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 19:25:30  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.82 * PRODUCTION * =========================================================================== *//*  $Id: sequence.cpp,v 1000.3 2004/06/01 19:25:30 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Clifford Clausen** File Description:*   Sequence utilities requiring CScope*/#include <ncbi_pch.hpp>#include <serial/iterator.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_vector_ci.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/impl/handle_range_map.hpp>#include <objmgr/impl/synonyms.hpp>#include <objects/general/Int_fuzz.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Delta_ext.hpp>#include <objects/seq/Delta_seq.hpp>#include <objects/seq/MolInfo.hpp>#include <objects/seq/Seg_ext.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_literal.hpp>#include <objects/seqloc/Packed_seqpnt.hpp>#include <objects/seqloc/Seq_bond.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_loc_equiv.hpp>#include <objects/seqloc/Seq_loc_mix.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/BioSource.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Code_break.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objmgr/util/sequence.hpp>#include <util/strsearch.hpp>#include <list>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)BEGIN_SCOPE(sequence)const COrg_ref& GetOrg_ref(const CBioseq_Handle& handle){    {{        CSeqdesc_CI desc(handle, CSeqdesc::e_Source);        if (desc) {            return desc->GetSource().GetOrg();        }    }}    {{        CSeqdesc_CI desc(handle, CSeqdesc::e_Org);        if (desc) {            return desc->GetOrg();        }    }}    NCBI_THROW(CException, eUnknown, "No organism set");}int GetTaxId(const CBioseq_Handle& handle){    try {        return GetOrg_ref(handle).GetTaxId();    }    catch (...) {        return 0;    }}TSeqPos GetLength(const CSeq_id& id, CScope* scope){    if ( !scope ) {        return numeric_limits<TSeqPos>::max();    }    CBioseq_Handle hnd = scope->GetBioseqHandle(id);    if ( !hnd ) {        return numeric_limits<TSeqPos>::max();    }    CBioseq_Handle::TBioseqCore core = hnd.GetBioseqCore();    return core->GetInst().IsSetLength() ? core->GetInst().GetLength() :        numeric_limits<TSeqPos>::max();}TSeqPos GetLength(const CSeq_loc& loc, CScope* scope)    THROWS((CNoLength)){    switch (loc.Which()) {    case CSeq_loc::e_Pnt:        return 1;    case CSeq_loc::e_Int:        return loc.GetInt().GetLength();    case CSeq_loc::e_Null:    case CSeq_loc::e_Empty:        return 0;    case CSeq_loc::e_Whole:        return GetLength(loc.GetWhole(), scope);    case CSeq_loc::e_Packed_int:        return loc.GetPacked_int().GetLength();    case CSeq_loc::e_Mix:        return GetLength(loc.GetMix(), scope);    case CSeq_loc::e_Packed_pnt:   // just a bunch of points        return loc.GetPacked_pnt().GetPoints().size();    case CSeq_loc::e_not_set:    case CSeq_loc::e_Bond:         //can't calculate length    case CSeq_loc::e_Feat:    case CSeq_loc::e_Equiv:        // unless actually the same length...    default:        THROW0_TRACE(CNoLength());    }}TSeqPos GetLength(const CSeq_loc_mix& mix, CScope* scope)    THROWS((CNoLength)){    TSeqPos length = 0;    ITERATE( CSeq_loc_mix::Tdata, i, mix.Get() ) {        TSeqPos ret = GetLength((**i), scope);        length += ret;    }    return length;}bool IsValid(const CSeq_point& pt, CScope* scope){    if (static_cast<TSeqPos>(pt.GetPoint()) >=         GetLength(pt.GetId(), scope) )    {        return false;    }    return true;}bool IsValid(const CPacked_seqpnt& pts, CScope* scope){    typedef CPacked_seqpnt::TPoints TPoints;    TSeqPos length = GetLength(pts.GetId(), scope);    ITERATE (TPoints, it, pts.GetPoints()) {        if (*it >= length) {            return false;        }    }    return true;}bool IsValid(const CSeq_interval& interval, CScope* scope){    if (interval.GetFrom() > interval.GetTo() ||        interval.GetTo() >= GetLength(interval.GetId(), scope))    {        return false;    }    return true;}bool IsSameBioseq (const CSeq_id& id1, const CSeq_id& id2, CScope* scope){    // Compare CSeq_ids directly    if (id1.Compare(id2) == CSeq_id::e_YES) {        return true;    }    // Compare handles    if ( scope ) {        try {            CBioseq_Handle hnd1 = scope->GetBioseqHandle(id1);            CBioseq_Handle hnd2 = scope->GetBioseqHandle(id2);            return hnd1  &&  hnd2  &&  (hnd1 == hnd2);        } catch (exception& e) {            ERR_POST(e.what() << ": CSeq_id1: " << id1.DumpAsFasta()                     << ": CSeq_id2: " << id2.DumpAsFasta());            return false;        }    }    return false;}bool IsOneBioseq(const CSeq_loc& loc, CScope* scope){    try {        GetId(loc, scope);        return true;    } catch (CNotUnique&) {        return false;    }}class CSeqIdFromHandleException : EXCEPTION_VIRTUAL_BASE public CException{public:    // Enumerated list of document management errors    enum EErrCode {        eNoSynonyms,        eRequestedIdNotFound    };    // Translate the specific error code into a string representations of    // that error code.    virtual const char* GetErrCodeString(void) const    {        switch (GetErrCode()) {        case eNoSynonyms:           return "eNoSynonyms";        case eRequestedIdNotFound:  return "eRequestedIdNotFound";        default:                    return CException::GetErrCodeString();        }    }    NCBI_EXCEPTION_DEFAULT(CSeqIdFromHandleException, CException);};const CSeq_id& GetId(const CBioseq_Handle& handle,                     EGetIdType type){    switch (type) {    case eGetId_HandleDefault:        return *handle.GetSeqId();    case eGetId_ForceGi:        if (handle.GetSeqId().GetPointer()  &&  handle.GetSeqId()->IsGi()) {            return *handle.GetSeqId();        }        {{            CConstRef<CSynonymsSet> syns =                handle.GetScope().GetSynonyms(*handle.GetSeqId());            if ( !syns ) {                string msg("No synonyms found for sequence ");                handle.GetSeqId()->GetLabel(&msg);                NCBI_THROW(CSeqIdFromHandleException, eNoSynonyms, msg);            }            ITERATE (CSynonymsSet, iter, *syns) {                CSeq_id_Handle idh = CSynonymsSet::GetSeq_id_Handle(iter);                if (idh.GetSeqId()->IsGi()) {                    return *idh.GetSeqId();                }            }        }}        break;    case eGetId_Best:        {{            CConstRef<CSynonymsSet> syns =                handle.GetScope().GetSynonyms(handle.GetSeq_id_Handle());            if ( !syns ) {                string msg("No synonyms found for sequence ");                handle.GetSeqId()->GetLabel(&msg);                NCBI_THROW(CSeqIdFromHandleException, eNoSynonyms, msg);            }            list< CRef<CSeq_id> > ids;            ITERATE (CSynonymsSet, iter, *syns) {                CSeq_id_Handle idh = CSynonymsSet::GetSeq_id_Handle(iter);                ids.push_back                    (CRef<CSeq_id>(const_cast<CSeq_id*>(idh.GetSeqId().GetPointer())));            }            CConstRef<CSeq_id> best_id = FindBestChoice(ids, CSeq_id::Score);            if (best_id) {                return *best_id;            }        }}        break;    default:        NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,                   "Unhandled seq-id type");        break;    }    NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,               "No best seq-id could be found");}const CSeq_id& GetId(const CSeq_loc& loc, CScope* scope)    THROWS((CNotUnique)){    CTypeConstIterator<CSeq_id> cur = ConstBegin(loc);    CTypeConstIterator<CSeq_id> first = ConstBegin(loc);    if (!first) {        THROW0_TRACE(CNotUnique());    }    for (++cur; cur; ++cur) {        if ( !IsSameBioseq(*cur, *first, scope) ) {            THROW0_TRACE(CNotUnique());        }    }    return *first;}inlinestatic ENa_strand s_GetStrand(const CSeq_loc& loc){    switch (loc.Which()) {    case CSeq_loc::e_Bond:        {            const CSeq_bond& bond = loc.GetBond();            ENa_strand a_strand = bond.GetA().IsSetStrand() ?                bond.GetA().GetStrand() : eNa_strand_unknown;            ENa_strand b_strand = eNa_strand_unknown;            if ( bond.IsSetB() ) {                b_strand = bond.GetB().IsSetStrand() ?                    bond.GetB().GetStrand() : eNa_strand_unknown;            }            if ( a_strand == eNa_strand_unknown  &&                 b_strand != eNa_strand_unknown ) {                a_strand = b_strand;            } else if ( a_strand != eNa_strand_unknown  &&                        b_strand == eNa_strand_unknown ) {                b_strand = a_strand;            }            return (a_strand != b_strand) ? eNa_strand_other : a_strand;        }    case CSeq_loc::e_Whole:        return eNa_strand_both;    case CSeq_loc::e_Int:        return loc.GetInt().IsSetStrand() ? loc.GetInt().GetStrand() :            eNa_strand_unknown;    case CSeq_loc::e_Pnt:        return loc.GetPnt().IsSetStrand() ? loc.GetPnt().GetStrand() :            eNa_strand_unknown;    case CSeq_loc::e_Packed_pnt:        return loc.GetPacked_pnt().IsSetStrand() ?            loc.GetPacked_pnt().GetStrand() : eNa_strand_unknown;    case CSeq_loc::e_Packed_int:    {        ENa_strand strand = eNa_strand_unknown;        bool strand_set = false;        ITERATE(CPacked_seqint::Tdata, i, loc.GetPacked_int().Get()) {            ENa_strand istrand = (*i)->IsSetStrand() ? (*i)->GetStrand() :                eNa_strand_unknown;            if (strand == eNa_strand_unknown  &&  istrand == eNa_strand_plus) {                strand = eNa_strand_plus;                strand_set = true;            } else if (strand == eNa_strand_plus  &&                istrand == eNa_strand_unknown) {                istrand = eNa_strand_plus;                strand_set = true;            } else if (!strand_set) {                strand = istrand;                strand_set = true;            } else if (istrand != strand) {                return eNa_strand_other;            }        }        return strand;    }    case CSeq_loc::e_Mix:    {        ENa_strand strand = eNa_strand_unknown;        bool strand_set = false;        ITERATE(CSeq_loc_mix::Tdata, it, loc.GetMix().Get()) {            ENa_strand istrand = GetStrand(**it);            if (strand == eNa_strand_unknown  &&  istrand == eNa_strand_plus) {                strand = eNa_strand_plus;                strand_set = true;            } else if (strand == eNa_strand_plus  &&                istrand == eNa_strand_unknown) {                istrand = eNa_strand_plus;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?