📄 seqport_util.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: seqport_util.cpp,v $ * PRODUCTION Revision 1000.4 2004/06/01 19:33:29 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R6.24 * PRODUCTION * =========================================================================== */ /*$Id: seqport_util.cpp,v 1000.4 2004/06/01 19:33:29 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Clifford Clausen * (also reviewed/fixed/groomed by Denis Vakatov and Aaron Ucko) * * File Description: */ #include <ncbi_pch.hpp>#include <corelib/ncbimtx.hpp>#include <objects/seq/seqport_util.hpp>#include <serial/serial.hpp>#include <serial/objostr.hpp>#include <serial/objistr.hpp>#include <objects/seq/NCBI2na.hpp>#include <objects/seq/NCBI4na.hpp>#include <objects/seq/NCBI8na.hpp>#include <objects/seq/NCBI8aa.hpp>#include <objects/seq/IUPACna.hpp>#include <objects/seq/IUPACaa.hpp>#include <objects/seq/NCBIeaa.hpp>#include <objects/seq/NCBIstdaa.hpp>#include <objects/seq/NCBIpaa.hpp>#include <objects/seqcode/Seq_code_set.hpp>#include <objects/seqcode/Seq_code_table.hpp>#include <objects/seqcode/Seq_code_type.hpp>#include <objects/seqcode/Seq_map_table.hpp>#include <util/sequtil/sequtil.hpp>#include <util/sequtil/sequtil_convert.hpp>#include <util/sequtil/sequtil_manip.hpp>#include <algorithm>#include <string.h>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPEstatic const bool kSymbol = true;static const bool kName = false;static const unsigned int kNumCodes = 11;static inline ESeq_code_type EChoiceToESeq (CSeq_data::E_Choice from_type){ switch (from_type) { case CSeq_data::e_Iupacaa: return eSeq_code_type_iupacaa; case CSeq_data::e_Ncbi2na: return eSeq_code_type_ncbi2na; case CSeq_data::e_Ncbi4na: return eSeq_code_type_ncbi4na; case CSeq_data::e_Iupacna: return eSeq_code_type_iupacna; case CSeq_data::e_Ncbieaa: return eSeq_code_type_ncbieaa; case CSeq_data::e_Ncbistdaa: return eSeq_code_type_ncbistdaa; case CSeq_data::e_Ncbi8na: return eSeq_code_type_ncbi8na; case CSeq_data::e_Ncbipna: return eSeq_code_type_ncbipna; case CSeq_data::e_Ncbi8aa: return eSeq_code_type_ncbi8aa; case CSeq_data::e_Ncbipaa: return eSeq_code_type_ncbipaa; default: throw CSeqportUtil::CBadType("EChoiceToESeq"); }} // CSeqportUtil_implementation is a singleton.class CSeqportUtil_implementation {public: CSeqportUtil_implementation(); ~CSeqportUtil_implementation(); typedef CSeqportUtil::TIndex TIndex; typedef CSeqportUtil::TPair TPair; TSeqPos Convert (const CSeq_data& in_seq, CSeq_data* out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx, TSeqPos uLength, bool bAmbig, CRandom::TValue seed) const; TSeqPos Pack (CSeq_data* in_seq, TSeqPos uLength) const; bool FastValidate (const CSeq_data& in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; void Validate (const CSeq_data& in_seq, vector<TSeqPos>* badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos GetAmbigs (const CSeq_data& in_seq, CSeq_data* out_seq, vector<TSeqPos>* out_indices, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos GetCopy (const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos Keep (CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos Append (CSeq_data* out_seq, const CSeq_data& in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data& in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2) const; TSeqPos Complement (CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos Complement (const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos Reverse (CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos Reverse (const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ReverseComplement (CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ReverseComplement (const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; const string& GetIupacaa3(TIndex ncbistdaa); bool IsCodeAvailable(CSeq_data::E_Choice code_type); bool IsCodeAvailable(ESeq_code_type code_type); TPair GetCodeIndexFromTo(CSeq_data::E_Choice code_type); TPair GetCodeIndexFromTo(ESeq_code_type code_type); const string& GetCodeOrName(CSeq_data::E_Choice code_type, TIndex idx, bool get_code); const string& GetCodeOrName(ESeq_code_type code_type, TIndex idx, bool get_code); TIndex GetIndex(CSeq_data::E_Choice code_type, const string& code); TIndex GetIndex(ESeq_code_type code_type, const string& code); TIndex GetIndexComplement(CSeq_data::E_Choice code_type, TIndex idx); TIndex GetIndexComplement(ESeq_code_type code_type, TIndex idx); TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx); TIndex GetMapToIndex(ESeq_code_type from_type, ESeq_code_type to_type, TIndex from_idx);private: // Template wrapper class used to create data type specific // classes to delete code tables on exit from main template <class T> class CWrapper_table : public CObject { public: CWrapper_table(int size, int start) { m_Table = new T[256]; m_StartAt = start; m_Size = size; } ~CWrapper_table() { drop_table(); } void drop_table() { delete[] m_Table; m_Table = 0; } T* m_Table; int m_StartAt; int m_Size; }; // Template wrapper class used for two-dimensional arrays. template <class T> class CWrapper_2D : public CObject { public: CWrapper_2D(int size1, int start1, int size2, int start2) { m_Size_D1 = size1; m_Size_D2 = size2; m_StartAt_D1 = start1; m_StartAt_D2 = start2; m_Table = new T*[size1]; for(int i=0; i<size1; i++) { m_Table[i] = new T[size2] - start2; } m_Table -= start1; } ~CWrapper_2D() { m_Table += m_StartAt_D1; for(int i=0; i<m_Size_D1; i++) { delete[](m_Table[i] + m_StartAt_D2); } delete[] m_Table; } T** m_Table; int m_Size_D1; int m_Size_D2; int m_StartAt_D1; int m_StartAt_D2; }; // Typedefs making use of wrapper classes above. typedef CWrapper_table<char> CCode_table; typedef CWrapper_table<string> CCode_table_str; typedef CWrapper_table<int> CMap_table; typedef CWrapper_table<unsigned int> CFast_table4; typedef CWrapper_table<unsigned short> CFast_table2; typedef CWrapper_table<unsigned char> CAmbig_detect; typedef CWrapper_table<char> CCode_comp; typedef CWrapper_table<char> CCode_rev; typedef CWrapper_2D<unsigned char> CFast_4_1; typedef CWrapper_2D<unsigned char> CFast_2_1; // String to initialize CSeq_code_set // This string is initialized in seqport_util.h static const char* sm_StrAsnData[]; // CSeq_code_set member holding code and map table data CRef<CSeq_code_set> m_SeqCodeSet; // Helper function used internally to initialize m_SeqCodeSet CRef<CSeq_code_set> Init(); // Member variables holding code tables CRef<CCode_table> m_Iupacna; CRef<CCode_table> m_Ncbieaa; CRef<CCode_table> m_Ncbistdaa; CRef<CCode_table> m_Iupacaa; // Helper function to initialize code tables CRef<CCode_table> InitCodes(ESeq_code_type code_type); // Member variables holding na complement information CRef<CCode_comp> m_Iupacna_complement; CRef<CCode_comp> m_Ncbi2naComplement; CRef<CCode_comp> m_Ncbi4naComplement; // Helper functions to initialize complement tables CRef<CCode_comp> InitIupacnaComplement(); CRef<CCode_comp> InitNcbi2naComplement(); CRef<CCode_comp> InitNcbi4naComplement(); // Member variables holding na reverse information // Used to reverse residues packed within a byte. CRef<CCode_rev> m_Ncbi2naRev; CRef<CCode_rev> m_Ncbi4naRev; // Helper functions to initialize reverse tables CRef<CCode_rev> InitNcbi2naRev(); CRef<CCode_rev> InitNcbi4naRev(); // Member variables holding map tables CRef<CMap_table> m_Ncbi2naIupacna; CRef<CMap_table> m_Ncbi2naNcbi4na; CRef<CMap_table> m_Ncbi4naIupacna; CRef<CMap_table> m_IupacnaNcbi2na; CRef<CMap_table> m_IupacnaNcbi4na; CRef<CMap_table> m_Ncbi4naNcbi2na; CRef<CMap_table> m_IupacaaNcbieaa; CRef<CMap_table> m_NcbieaaIupacaa; CRef<CMap_table> m_IupacaaNcbistdaa; CRef<CMap_table> m_NcbieaaNcbistdaa; CRef<CMap_table> m_NcbistdaaNcbieaa; CRef<CMap_table> m_NcbistdaaIupacaa; TSeqPos x_ConvertAmbig (const CSeq_data& in_seq, CSeq_data* out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx, TSeqPos uLength, CRandom::TValue seed) const; // Helper function to initialize map tables CRef<CMap_table> InitMaps(ESeq_code_type from_type, ESeq_code_type to_type); // Member variables holding fast conversion tables // Takes a byte as an index and returns a unsigned int with // 4 characters, each character being one of ATGC //CRef<CFast_table4> m_FastNcbi2naIupacna;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -