📄 sequence.hpp
字号:
/* * =========================================================================== * PRODUCTION $Log: sequence.hpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:22:17 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.44 * PRODUCTION * =========================================================================== */#ifndef SEQUENCE__HPP#define SEQUENCE__HPP/* $Id: sequence.hpp,v 1000.2 2004/06/01 19:22:17 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Clifford Clausen & Aaron Ucko** File Description:* Sequence utilities requiring CScope* Obtains or constructs a sequence's title. (Corresponds to* CreateDefLine in the C toolkit.)*/#include <corelib/ncbistd.hpp>#include <serial/serial.hpp>#include <serial/objistr.hpp>#include <serial/objostr.hpp>#include <objects/seqfeat/SeqFeatData.hpp>#include <objects/seqloc/Na_strand.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <util/strsearch.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)// Forward declarationsclass CSeq_id;class CSeq_loc_mix;class CSeq_point;class CPacked_seqpnt;class CScope;class CBioseq_Handle;class CSeqVector;class CCdregion;class CSeq_feat;class CSeq_entry;class CGenetic_code;BEGIN_SCOPE(sequence)struct CNotUnique : public runtime_error{ CNotUnique() : runtime_error("CSeq_ids do not refer to unique CBioseq") {}};struct CNoLength : public runtime_error{ CNoLength() : runtime_error("Unable to determine length") {}};// Containment relationships between CSeq_locsenum ECompare { eNoOverlap = 0, // CSeq_locs do not overlap eContained, // First CSeq_loc contained by second eContains, // First CSeq_loc contains second eSame, // CSeq_locs contain each other eOverlap // CSeq_locs overlap};// Get sequence length if scope not null, else return max possible TSeqPosNCBI_XOBJUTIL_EXPORTTSeqPos GetLength(const CSeq_id& id, CScope* scope = 0);// Get length of sequence represented by CSeq_loc, if possibleNCBI_XOBJUTIL_EXPORTTSeqPos GetLength(const CSeq_loc& loc, CScope* scope = 0) THROWS((sequence::CNoLength));// Get length of CSeq_loc_mix == sum (length of embedded CSeq_locs)NCBI_XOBJUTIL_EXPORTTSeqPos GetLength(const CSeq_loc_mix& mix, CScope* scope = 0) THROWS((sequence::CNoLength));// Checks that point >= 0 and point < length of BioseqNCBI_XOBJUTIL_EXPORTbool IsValid(const CSeq_point& pt, CScope* scope = 0);// Checks that all points >=0 and < length of CBioseq. If scope is 0// assumes length of CBioseq is max value of TSeqPos.NCBI_XOBJUTIL_EXPORTbool IsValid(const CPacked_seqpnt& pts, CScope* scope = 0);// Checks from and to of CSeq_interval. If from < 0, from > to, or// to >= length of CBioseq this is an interval for, returns false, else true.NCBI_XOBJUTIL_EXPORTbool IsValid(const CSeq_interval& interval, CScope* scope = 0);// Determines if two CSeq_ids represent the same CBioseqNCBI_XOBJUTIL_EXPORTbool IsSameBioseq(const CSeq_id& id1, const CSeq_id& id2, CScope* scope = 0);// Returns true if all embedded CSeq_ids represent the same CBioseq, else falseNCBI_XOBJUTIL_EXPORTbool IsOneBioseq(const CSeq_loc& loc, CScope* scope = 0);// If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns// the first CSeq_id found, else throws exception CNotUnique()NCBI_XOBJUTIL_EXPORTconst CSeq_id& GetId(const CSeq_loc& loc, CScope* scope = 0) THROWS((sequence::CNotUnique));// Returns eNa_strand_unknown if multiple Bioseqs in loc// Returns eNa_strand_other if multiple strands in same loc// Returns eNa_strand_both if loc is a Whole// Returns strand otherwiseNCBI_XOBJUTIL_EXPORTENa_strand GetStrand(const CSeq_loc& loc, CScope* scope = 0);// If only one CBioseq is represented by CSeq_loc, returns the lowest residue// position represented. If not null, scope is used to determine if two// CSeq_ids represent the same CBioseq. Throws exception CNotUnique if// CSeq_loc does not represent one CBioseq.NCBI_XOBJUTIL_EXPORTTSeqPos GetStart(const CSeq_loc& loc, CScope* scope = 0) THROWS((sequence::CNotUnique));// If only one CBioseq is represented by CSeq_loc, returns the highest residue// position represented. If not null, scope is used to determine if two// CSeq_ids represent the same CBioseq. Throws exception CNotUnique if// CSeq_loc does not represent one CBioseq.NCBI_XOBJUTIL_EXPORTTSeqPos GetStop(const CSeq_loc& loc, CScope* scope = 0) THROWS((sequence::CNotUnique));// Returns the sequence::ECompare containment relationship between CSeq_locsNCBI_XOBJUTIL_EXPORTsequence::ECompare Compare(const CSeq_loc& loc1, const CSeq_loc& loc2, CScope* scope = 0);// Get sequence's title (used in various flat-file formats.)// This function is here rather than in CBioseq because it may need// to inspect other sequences. The reconstruct flag indicates that it// should ignore any existing title Seqdesc.enum EGetTitleFlags { fGetTitle_Reconstruct = 0x1, // ignore existing title Seqdesc. fGetTitle_Organism = 0x2 // append [organism]};typedef int TGetTitleFlags;NCBI_XOBJUTIL_EXPORTstring GetTitle(const CBioseq_Handle& hnd, TGetTitleFlags flags = 0);/// Retrieve a particular seq-id from a given bioseq handle. This uses/// CSynonymsSet internally to decide which seq-id should be used.enum EGetIdType { eGetId_ForceGi, // return only a gi-based seq-id eGetId_Best, // return the "best" gi (uses FindBestScore(), // with CSeq_id::CalculateScore() as the score // function eGetId_HandleDefault, // returns the ID associated with a bioseq-handle eGetId_Default = eGetId_Best};NCBI_XOBJUTIL_EXPORTconst CSeq_id& GetId(const CBioseq_Handle& handle, EGetIdType type = eGetId_Default);// Change a CSeq_id to the one for the CBioseq that it represents// that has the best rank or worst rank according on value of best.// Just returns if scope == 0NCBI_XOBJUTIL_EXPORTvoid ChangeSeqId(CSeq_id* id, bool best, CScope* scope = 0);// Change each of the CSeq_ids embedded in a CSeq_loc to the best// or worst CSeq_id accoring to the value of best. Just returns if// scope == 0NCBI_XOBJUTIL_EXPORTvoid ChangeSeqLocId(CSeq_loc* loc, bool best, CScope* scope = 0);enum ESeqLocCheck { eSeqLocCheck_ok, eSeqLocCheck_warning, eSeqLocCheck_error};// Checks that a CSeq_loc is all on one strand on one CBioseq. For embedded // points, checks that the point location is <= length of sequence of point. // For packed points, checks that all points are within length of sequence. // For intervals, ensures from <= to and interval is within length of sequence.// If no mixed strands and lengths are valid, returns eSeqLocCheck_ok. If// only mixed strands/CBioseq error, then returns eSeqLocCheck_warning. If // length error, then returns eSeqLocCheck_error.NCBI_XOBJUTIL_EXPORTESeqLocCheck SeqLocCheck(const CSeq_loc& loc, CScope* scope);// Returns true if the order of Seq_locs is bad, otherwise, falseNCBI_XOBJUTIL_EXPORTbool BadSeqLocSortOrder(const CBioseq& seq, const CSeq_loc& loc, CScope* scope);enum ES2PFlags { fS2P_NoMerge = 0x1, // don't merge adjacent intervals on the product fS2P_AllowTer = 0x2 // map the termination codon as a legal location};typedef int TS2PFlags; // binary OR of ES2PFlagsNCBI_XOBJUTIL_EXPORTCRef<CSeq_loc> SourceToProduct(const CSeq_feat& feat, const CSeq_loc& source_loc, TS2PFlags flags = 0, CScope* scope = 0, int* frame = 0);enum EP2SFlags { fP2S_Extend = 0x1 // if hitting ends, extend to include partial codons};typedef int TP2SFlags; // binary OR of ES2PFlagsNCBI_XOBJUTIL_EXPORTCRef<CSeq_loc> ProductToSource(const CSeq_feat& feat, const CSeq_loc& prod_loc, TP2SFlags flags = 0, CScope* scope = 0);enum EOffsetType { // For positive-orientation strands, start = left and end = right; // for reverse-orientation strands, start = right and end = left. eOffset_FromStart, // relative to beginning of location eOffset_FromEnd, // relative to end of location eOffset_FromLeft, // relative to low-numbered end eOffset_FromRight // relative to high-numbered end};// returns (TSeqPos)-1 if the locations don't overlapNCBI_XOBJUTIL_EXPORTTSeqPos LocationOffset(const CSeq_loc& outer, const CSeq_loc& inner, EOffsetType how = eOffset_FromStart, CScope* scope = 0);enum EOverlapType { eOverlap_Simple, // any overlap of extremes eOverlap_Contained, // 2nd contained within 1st extremes eOverlap_Contains, // 2nd contains 1st extremes eOverlap_Subset, // 2nd is a subset of 1st ranges eOverlap_CheckIntervals, // 2nd is a subset of 1st with matching boundaries eOverlap_Interval // at least one pair of intervals must overlap};// Check if the two locations have ovarlap of a given typeNCBI_XOBJUTIL_EXPORTint TestForOverlap(const CSeq_loc& loc1, const CSeq_loc& loc2, EOverlapType type, TSeqPos circular_len = kInvalidSeqPos);NCBI_XOBJUTIL_EXPORTCConstRef<CSeq_feat> GetBestOverlappingFeat(const CSeq_loc& loc, CSeqFeatData::E_Choice feat_type, EOverlapType overlap_type, CScope& scope);NCBI_XOBJUTIL_EXPORTCConstRef<CSeq_feat> GetBestOverlappingFeat(const CSeq_loc& loc, CSeqFeatData::ESubtype feat_type, EOverlapType overlap_type, CScope& scope);// Convenience functions for popular overlapping typesNCBI_XOBJUTIL_EXPORTCConstRef<CSeq_feat> GetOverlappingGene(const CSeq_loc& loc, CScope& scope);NCBI_XOBJUTIL_EXPORTCConstRef<CSeq_feat> GetOverlappingmRNA(const CSeq_loc& loc, CScope& scope);NCBI_XOBJUTIL_EXPORTCConstRef<CSeq_feat> GetOverlappingCDS(const CSeq_loc& loc, CScope& scope);NCBI_XOBJUTIL_EXPORTCConstRef<CSeq_feat> GetOverlappingPub(const CSeq_loc& loc, CScope& scope);NCBI_XOBJUTIL_EXPORTCConstRef<CSeq_feat> GetOverlappingSource(const CSeq_loc& loc, CScope& scope);NCBI_XOBJUTIL_EXPORTCConstRef<CSeq_feat> GetOverlappingOperon(const CSeq_loc& loc, CScope& scope);enum ESeqlocPartial { eSeqlocPartial_Complete = 0, eSeqlocPartial_Start = 1, eSeqlocPartial_Stop = 2, eSeqlocPartial_Internal = 4, eSeqlocPartial_Other = 8, eSeqlocPartial_Nostart = 16, eSeqlocPartial_Nostop = 32, eSeqlocPartial_Nointernal = 64, eSeqlocPartial_Limwrong = 128, eSeqlocPartial_Haderror = 256}; // Sets bits for incomplete location and/or errorsNCBI_XOBJUTIL_EXPORTint SeqLocPartialCheck(const CSeq_loc& loc, CScope* scope);enum ESeqLocFlags{ fMergeIntervals = 1, // merge overlapping intervals fFuseAbutting = 2, // fuse together abutting intervals fSingleInterval = 4, // create a single interval fAddNulls = 8 // will add a null Seq-loc between intervals };typedef unsigned int TSeqLocFlags; // logical OR of ESeqLocFlags// Merge two Seq-locs returning the merged location.NCBI_XOBJUTIL_EXPORTCSeq_loc* SeqLocMerge(const CBioseq_Handle& target, const CSeq_loc& loc1, const CSeq_loc& loc2, TSeqLocFlags flags = 0);// Merge a single Seq-locNCBI_XOBJUTIL_EXPORTCSeq_loc* SeqLocMergeOne(const CBioseq_Handle& target, const CSeq_loc& loc, TSeqLocFlags flags = 0);// Merge a set of locations, returning the result.template<typename LocContainer>CSeq_loc* SeqLocMerge(const CBioseq_Handle& target, LocContainer& locs, TSeqLocFlags flags = 0){ // create a single Seq-loc holding all the locations CSeq_loc temp; ITERATE( typename LocContainer, it, locs ) { temp.Add(**it); } return SeqLocMergeOne(target, temp, flags);}NCBI_XOBJUTIL_EXPORTCSeq_loc* SeqLocRevCmp(const CSeq_loc& loc, CScope* scope = 0);// Get the encoding CDS feature of a given protein sequence.NCBI_XOBJUTIL_EXPORTconst CSeq_feat* GetCDSForProduct(const CBioseq& product, CScope* scope);NCBI_XOBJUTIL_EXPORTconst CSeq_feat* GetCDSForProduct(const CBioseq_Handle& product);// Get the mature peptide feature of a proteinNCBI_XOBJUTIL_EXPORTconst CSeq_feat* GetPROTForProduct(const CBioseq& product, CScope* scope);NCBI_XOBJUTIL_EXPORTconst CSeq_feat* GetPROTForProduct(const CBioseq_Handle& product);// Get the encoding mRNA feature of a given mRNA (cDNA) bioseq.NCBI_XOBJUTIL_EXPORTconst CSeq_feat* GetmRNAForProduct(const CBioseq& product, CScope* scope);NCBI_XOBJUTIL_EXPORTconst CSeq_feat* GetmRNAForProduct(const CBioseq_Handle& product);// Get the encoding nucleotide sequnce of a protein.NCBI_XOBJUTIL_EXPORTconst CBioseq* GetNucleotideParent(const CBioseq& product, CScope* scope);NCBI_XOBJUTIL_EXPORTCBioseq_Handle GetNucleotideParent(const CBioseq_Handle& product);// return the org-ref associated with a given sequence. This will throw// a CException if there is no org-ref associated with the sequenceNCBI_XOBJUTIL_EXPORTconst COrg_ref& GetOrg_ref(const CBioseq_Handle& handle);// return the tax-id associated with a given sequence. This will return 0// if no tax-id can be found.NCBI_XOBJUTIL_EXPORTint GetTaxId(const CBioseq_Handle& handle);END_SCOPE(sequence)// FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>class NCBI_XOBJUTIL_EXPORT CFastaOstream {public:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -