📄 seq_loc_mapper.hpp
字号:
/* * =========================================================================== * PRODUCTION $Log: seq_loc_mapper.hpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:21:41 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.13 * PRODUCTION * =========================================================================== */#ifndef SEQ_LOC_MAPPER__HPP#define SEQ_LOC_MAPPER__HPP/* $Id: seq_loc_mapper.hpp,v 1000.1 2004/06/01 19:21:41 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aleksey Grichenko** File Description:* Seq-loc mapper**/#include <corelib/ncbistd.hpp>#include <corelib/ncbiobj.hpp>#include <util/range.hpp>#include <util/rangemap.hpp>#include <objects/seqloc/Na_strand.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objmgr/seq_id_handle.hpp>#include <objects/general/Int_fuzz.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)class CSeq_id;class CSeq_loc;class CSeq_loc_CI;class CSeq_feat;class CSeq_align;class CScope;class CBioseq_Handle;class CSeqMap;class CMappingRange : public CObject{public: CMappingRange(CSeq_id_Handle src_id, TSeqPos src_from, TSeqPos src_length, ENa_strand src_strand, CSeq_id_Handle dst_id, TSeqPos dst_from, ENa_strand dst_strand); bool GoodSrcId(const CSeq_id& id) const; CRef<CSeq_id> GetDstId(void); typedef CRange<TSeqPos> TRange; typedef CRef<CInt_fuzz> TFuzz; typedef pair<TFuzz, TFuzz> TRangeFuzz; bool CanMap(TSeqPos from, TSeqPos to, bool is_set_strand, ENa_strand strand) const; TSeqPos Map_Pos(TSeqPos pos) const; TRange Map_Range(TSeqPos from, TSeqPos to) const; bool Map_Strand(bool is_set_strand, ENa_strand src, ENa_strand* dst) const; TRangeFuzz Map_Fuzz(TRangeFuzz& fuzz) const;private: CInt_fuzz::ELim x_ReverseFuzzLim(CInt_fuzz::ELim lim) const; CSeq_id_Handle m_Src_id_Handle; TSeqPos m_Src_from; TSeqPos m_Src_to; ENa_strand m_Src_strand; CSeq_id_Handle m_Dst_id_Handle; TSeqPos m_Dst_from; ENa_strand m_Dst_strand; bool m_Reverse; friend class CSeq_loc_Mapper; friend class CSeq_align_Mapper; friend struct CMappingRangeRef_Less;};class NCBI_XOBJMGR_EXPORT CSeq_loc_Mapper : public CObject{public: enum EFeatMapDirection { eLocationToProduct, eProductToLocation }; // Mapping through a feature, both location and product must be set. // If scope is set, synonyms are resolved for each source ID. CSeq_loc_Mapper(const CSeq_feat& map_feat, EFeatMapDirection dir, CScope* scope = 0); // Mapping between two seq_locs. If scope is set, synonyms are resolved // for each source ID. CSeq_loc_Mapper(const CSeq_loc& source, const CSeq_loc& target, CScope* scope = 0); // Mapping through an alignment. Need to specify target ID or // target row of the alignment. Any other ID is mapped to the // target one. If scope is set, synonyms are resolved for each source ID. // Only the first row matching target ID is used, all other rows // are considered source. CSeq_loc_Mapper(const CSeq_align& map_align, const CSeq_id& to_id, CScope* scope = 0); CSeq_loc_Mapper(const CSeq_align& map_align, size_t to_row, CScope* scope = 0); // Mapping from segments to the segmented sequence (same as // in annot iterator). If dst_id is set, all segments are // mapped to the id. Otherwise mapping is done to the top // level references in the map (e.g. if the map is created from // a seq-loc). CSeq_loc_Mapper(CBioseq_Handle target_seq); CSeq_loc_Mapper(const CSeqMap& seq_map, const CSeq_id* dst_id = 0, CScope* scope = 0); // Mapping from master sequence to its segments, restricted // by depth. Depth = 0 is for synonyms conversion. CSeq_loc_Mapper(size_t depth, CBioseq_Handle& source_seq); CSeq_loc_Mapper(size_t depth, const CSeqMap& source_seqmap, const CSeq_id* src_id = 0, CScope* scope = 0); ~CSeq_loc_Mapper(void); // Intervals' merging mode CSeq_loc_Mapper& SetMergeNone(void); CSeq_loc_Mapper& SetMergeAbutting(void); CSeq_loc_Mapper& SetMergeAll(void); CSeq_loc_Mapper& SetGapPreserve(void); CSeq_loc_Mapper& SetGapRemove(void); // Create target-to-target mapping to avoid truncation of ranges // already on the target sequence(s). void PreserveDestinationLocs(void); // Keep ranges which can not be mapped. Does not affect truncation // of partially mapped ranges. By default nonmapping ranges are // truncated. void KeepNonmappingRanges(void); void TruncateNonmappingRanges(void); CRef<CSeq_loc> Map(const CSeq_loc& src_loc); CRef<CSeq_align> Map(const CSeq_align& src_align); // Check if the last mapping resulted in partial location bool LastIsPartial(void);private: CSeq_loc_Mapper(const CSeq_loc_Mapper&); CSeq_loc_Mapper& operator=(const CSeq_loc_Mapper&); friend class CSeq_loc_Conversion_Set; friend class CSeq_align_Mapper; enum EMergeFlags { eMergeNone, // no merging eMergeAbutting, // merge only abutting intervals, keep overlapping eMergeAll // merge both abutting and overlapping intervals }; enum EGapFlags { eGapPreserve, // Leave gaps as-is eGapRemove // Remove gaps (NULLs) }; enum EWidthFlags { fWidthProtToNuc = 1, fWidthNucToProt = 2 }; typedef int TWidthFlags; // binary OR of "EWidthFlags" // Conversions typedef CRange<TSeqPos> TRange; typedef CRangeMultimap<CRef<CMappingRange>, TSeqPos> TRangeMap; typedef TRangeMap::iterator TRangeIterator; typedef map<CSeq_id_Handle, TRangeMap> TIdMap; // List and map of target ranges to construct target-to-target mapping typedef list<TRange> TDstRanges; typedef map<CSeq_id_Handle, TDstRanges> TDstIdMap; typedef vector<TDstIdMap> TDstStrandMap; // Destination locations arranged by ID/range typedef CRef<CInt_fuzz> TFuzz; typedef pair<TFuzz, TFuzz> TRangeFuzz; typedef pair<TRange, TRangeFuzz> TRangeWithFuzz; typedef list<TRangeWithFuzz> TMappedRanges; // 0 = not set, any other index = na_strand + 1 typedef vector<TMappedRanges> TRangesByStrand; typedef map<CSeq_id_Handle, TRangesByStrand> TRangesById; typedef map<CSeq_id_Handle, TWidthFlags> TWidthById; typedef CSeq_align::C_Segs::TDendiag TDendiag; typedef CSeq_align::C_Segs::TStd TStd; // Check molecule type, return character width (3=na, 1=aa, 0=unknown). int x_CheckSeqWidth(const CSeq_id& id, int width); int x_CheckSeqWidth(const CSeq_loc& loc, TSeqPos* total_length); // Get sequence length, try to get the real length for // reverse strand, do not use "whole". TSeqPos x_GetRangeLength(const CSeq_loc_CI& it); void x_AddConversion(const CSeq_id& src_id, TSeqPos src_start, ENa_strand src_strand,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -