📄 seqport_util.cpp
字号:
// Takes a byte (each byte with 4 Ncbi2na codes) as an index and // returns a Unit2 with 2 bytes, each byte formated as 2 Ncbi4na codes //CRef<CFast_table2> m_FastNcbi2naNcbi4na; // Takes a byte (each byte with 2 Ncbi4na codes) as an index and // returns a 2 byte string, each byte with an Iupacna code. //CRef<CFast_table2> m_FastNcbi4naIupacna; // Table used for fast compression from Iupacna to Ncbi2na (4 bytes to 1 // byte). This table is a 2 dimensional table. The first dimension // corresponds to the iupacna position modulo 4 (0-3). The second dimension // is the value of the iupacna byte (0-255). The 4 resulting values from 4 // iupancna bytes are bitwise or'd to produce 1 byte. CRef<CFast_4_1> m_FastIupacnaNcbi2na; // Table used for fast compression from Iupacna to Ncbi4na // (2 bytes to 1 byte). Similar to m_FastIupacnaNcbi2na CRef<CFast_2_1> m_FastIupacnaNcbi4na; // Table used for fast compression from Ncbi4na to Ncbi2na // (2 bytes to 1 byte). Similar to m_FastIupacnaNcbi4na CRef<CFast_2_1> m_FastNcbi4naNcbi2na; // Tables used to convert an index for a code type to a symbol or name // for the same code type vector<vector<string> > m_IndexString[2]; vector<vector<TIndex> > m_IndexComplement; vector<map<string, TIndex> > m_StringIndex; vector<TIndex> m_StartAt; // Helper function to initialize fast conversion tables //CRef<CFast_table4> InitFastNcbi2naIupacna(); CRef<CFast_table2> InitFastNcbi2naNcbi4na(); CRef<CFast_table2> InitFastNcbi4naIupacna(); CRef<CFast_4_1> InitFastIupacnaNcbi2na(); CRef<CFast_2_1> InitFastIupacnaNcbi4na(); CRef<CFast_2_1> InitFastNcbi4naNcbi2na(); // Helper functions to initialize Index to/from code/name conversion tables // and complement tables void InitIndexCodeName(); // Data members and functions used for random disambiguation // structure used for ncbi4na --> ncbi2na struct SMasksArray : public CObject { // Structure to hold all masks applicable to an input byte struct SMasks { int nMasks; unsigned char cMask[16]; }; SMasks m_Table[256]; }; CRef<SMasksArray> m_Masks; // Helper function to initialize m_Masks CRef<SMasksArray> InitMasks(); // Data members used for detecting ambiguities // Data members used by GetAmbig methods to get a list of // ambiguities resulting from alphabet conversions CRef<CAmbig_detect> m_DetectAmbigNcbi4naNcbi2na; CRef<CAmbig_detect> m_DetectAmbigIupacnaNcbi2na; // Helper functiond to initialize m_Detect_Ambig_ data members CRef<CAmbig_detect> InitAmbigNcbi4naNcbi2na(); CRef<CAmbig_detect> InitAmbigIupacnaNcbi2na(); // Alphabet conversion functions. Functions return // the number of converted codes. /* // Fuction to convert ncbi2na (1 byte) to iupacna (4 bytes) TSeqPos MapNcbi2naToIupacna(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Function to convert ncbi2na (1 byte) to ncbi4na (2 bytes) TSeqPos MapNcbi2naToNcbi4na(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Function to convert ncbi4na (1 byte) to iupacna (2 bytes) TSeqPos MapNcbi4naToIupacna(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; */ // Function to convert iupacna (4 bytes) to ncbi2na (1 byte) TSeqPos MapIupacnaToNcbi2na(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength, bool bAmbig, CRandom::TValue seed) const; /* // Function to convert iupacna (2 bytes) to ncbi4na (1 byte) TSeqPos MapIupacnaToNcbi4na(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; */ // Function to convert ncbi4na (2 bytes) to ncbi2na (1 byte) TSeqPos MapNcbi4naToNcbi2na(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength, bool bAmbig, CRandom::TValue seed) const; /* // Function to convert iupacaa (byte) to ncbieaa (byte) TSeqPos MapIupacaaToNcbieaa(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Function to convert ncbieaa (byte) to iupacaa (byte) TSeqPos MapNcbieaaToIupacaa(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Function to convert iupacaa (byte) to ncbistdaa (byte) TSeqPos MapIupacaaToNcbistdaa(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Function to convert ncbieaa (byte) to ncbistdaa (byte) TSeqPos MapNcbieaaToNcbistdaa(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Function to convert ncbistdaa (byte) to ncbieaa (byte) TSeqPos MapNcbistdaaToNcbieaa(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Function to convert ncbistdaa (byte) to iupacaa (byte) TSeqPos MapNcbistdaaToIupacaa(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; */ // Fast Validation functions bool FastValidateIupacna(const CSeq_data& in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; bool FastValidateNcbieaa(const CSeq_data& in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; bool FastValidateNcbistdaa(const CSeq_data& in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; bool FastValidateIupacaa(const CSeq_data& in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Full Validation functions void ValidateIupacna(const CSeq_data& in_seq, vector<TSeqPos>* badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const; void ValidateNcbieaa(const CSeq_data& in_seq, vector<TSeqPos>* badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const; void ValidateNcbistdaa(const CSeq_data& in_seq, vector<TSeqPos>* badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const; void ValidateIupacaa(const CSeq_data& in_seq, vector<TSeqPos>* badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const; // Functions to make copies of the different types of sequences TSeqPos GetNcbi2naCopy(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos GetNcbi4naCopy(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos GetIupacnaCopy(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos GetNcbieaaCopy(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos GetNcbistdaaCopy(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos GetIupacaaCopy(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Function to adjust uBeginIdx to lie on an in_seq byte boundary // and uLength to lie on on an out_seq byte boundary. Returns // overhang, the number of out seqs beyond byte boundary determined // by uBeginIdx + uLength TSeqPos Adjust(TSeqPos* uBeginIdx, TSeqPos* uLength, TSeqPos uInSeqBytes, TSeqPos uInSeqsPerByte, TSeqPos uOutSeqsPerByte) const; // GetAmbig methods // Loops through an ncbi4na input sequence and determines // the ambiguities that would result from conversion to an ncbi2na sequence // On return, out_seq contains the ncbi4na bases that become ambiguous and // out_indices contains the indices of the abiguous bases in in_seq TSeqPos GetAmbigs_ncbi4na_ncbi2na(const CSeq_data& in_seq, CSeq_data* out_seq, vector<TSeqPos>* out_indices, TSeqPos uBeginIdx, TSeqPos uLength) const; // Loops through an iupacna input sequence and determines // the ambiguities that would result from conversion to an ncbi2na sequence // On return, out_seq contains the iupacna bases that become ambiguous and // out_indices contains the indices of the abiguous bases in in_seq. The // return is the number of ambiguities found. TSeqPos GetAmbigs_iupacna_ncbi2na(const CSeq_data& in_seq, CSeq_data* out_seq, vector<TSeqPos>* out_indices, TSeqPos uBeginIdx, TSeqPos uLength) const; // Methods to perform Keep on specific seq types. Methods // return length of kept sequence. TSeqPos KeepNcbi2na(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos KeepNcbi4na(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos KeepIupacna(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos KeepNcbieaa(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos KeepNcbistdaa(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos KeepIupacaa(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Methods to complement na sequences // In place methods. Return number of complemented residues. TSeqPos ComplementIupacna(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ComplementNcbi2na(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ComplementNcbi4na(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Complement in copy methods TSeqPos ComplementIupacna(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ComplementNcbi2na(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ComplementNcbi4na(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Methods to reverse na sequences // In place methods TSeqPos ReverseIupacna(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ReverseNcbi2na(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ReverseNcbi4na(CSeq_data* in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Reverse in copy methods TSeqPos ReverseIupacna(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ReverseNcbi2na(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; TSeqPos ReverseNcbi4na(const CSeq_data& in_seq, CSeq_data* out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const; // Methods to reverse-complement an na sequences // In place methods TSeqPos ReverseComplementIupacna(CSeq_data* in_seq, TSeqPos uBeginIdx,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -