📄 genetic_code_table.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: Genetic_code_table.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:33:54 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R6.17 * PRODUCTION * =========================================================================== *//* $Id: Genetic_code_table.cpp,v 1000.2 2004/06/01 19:33:54 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: ....... * * File Description: * ....... * * Remark: * This code was originally generated by application DATATOOL * using specifications from the ASN data definition file * 'seqfeat.asn'. */// standard includes#include <ncbi_pch.hpp>#include <serial/serial.hpp>#include <serial/objistr.hpp>#include <corelib/ncbithr.hpp>// generated includes#include <objects/seqfeat/Genetic_code_table.hpp>// generated classesBEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::// destructorCGenetic_code_table::~CGenetic_code_table(void){}// genetic code translation tables// destructorCTrans_table::~CTrans_table(void){}// translation finite state machine base codes - ncbi4naenum EBaseCode { eBase_gap = 0, eBase_A, /* A */ eBase_C, /* C */ eBase_M, /* AC */ eBase_G, /* G */ eBase_R, /* AG */ eBase_S, /* CG */ eBase_V, /* ACG */ eBase_T, /* T */ eBase_W, /* AT */ eBase_Y, /* CT */ eBase_H, /* ACT */ eBase_K, /* GT */ eBase_D, /* AGT */ eBase_B, /* CGT */ eBase_N /* ACGT */};// static instances of single copy translation tables common to all genetic codesint CTrans_table::sm_NextState [4097];int CTrans_table::sm_RvCmpState [4097];int CTrans_table::sm_BaseToIdx [256];// initialize base conversion, next state, and reverse complement state tablesvoid CTrans_table::x_InitFsaTable (void){ char ch; int i, j, k, p, q, r, nx, st; static char charToBase [17] = "-ACMGRSVTWYHKDBN"; static char baseToComp [17] = "-TGKCYSBAWRDMHVN"; // illegal characters map to 0 for (i = 0; i < 256; i++) { sm_BaseToIdx [i] = 0; } // map iupacna alphabet to EBaseCode for (i = eBase_gap; i <= eBase_N; i++) { ch = charToBase [i]; sm_BaseToIdx [(int) ch] = i; ch = tolower (ch); sm_BaseToIdx [(int) ch] = i; } sm_BaseToIdx [(int) 'U'] = eBase_T; sm_BaseToIdx [(int) 'u'] = eBase_T; sm_BaseToIdx [(int) 'X'] = eBase_N; sm_BaseToIdx [(int) 'x'] = eBase_N; // also map ncbi4na alphabet to EBaseCode for (i = eBase_gap; i <= eBase_N; i++) { sm_BaseToIdx [(int) i] = i; } // treat state 0 as already having seen NN, // avoiding single and double letter states sm_NextState [0] = 4081; sm_RvCmpState [0] = 4096; // states 1 through 4096 are triple letter states (---, --A, ..., NNT, NNN) for (i = eBase_gap, st = 1; i <= eBase_N; i++) { for (j = eBase_gap, nx = 1; j <= eBase_N; j++) { for (k = eBase_gap; k <= eBase_N; k++, st++, nx += 16) { sm_NextState [st] = nx; p = sm_BaseToIdx [(int) (Uint1) baseToComp [k]]; q = sm_BaseToIdx [(int) (Uint1) baseToComp [j]]; r = sm_BaseToIdx [(int) (Uint1) baseToComp [i]]; sm_RvCmpState [st] = 256 * p + 16 * q + r + 1; } } }}// initialize genetic code specific translation tablesvoid CTrans_table::x_InitFsaTransl (const string *ncbieaa, const string *sncbieaa) const{ char ch, aa, orf; bool go_on; int i, j, k, p, q, r, x, y, z, st, cd; static int expansions [4] = {eBase_A, eBase_C, eBase_G, eBase_T}; // T = 0, C = 1, A = 2, G = 3 static int codonIdx [9] = {0, 2, 1, 0, 3, 0, 0, 0, 0}; // return if unable to find ncbieaa and sncbieaa strings if (ncbieaa == 0 || sncbieaa == 0) return; // also check length of ncbieaa and sncbieaa strings if (ncbieaa->size () != 64 || sncbieaa->size () != 64) return; // ambiguous codons map to unknown amino acid or not start for (i = 0; i <= 4096; i++) { m_AminoAcid [i] = 'X'; m_OrfStart [i] = '-'; } // lookup amino acid for each codon in genetic code table for (i = eBase_gap, st = 1; i <= eBase_N; i++) { for (j = eBase_gap; j <= eBase_N; j++) { for (k = eBase_gap; k <= eBase_N; k++, st++) { aa = '\0'; orf = '\0'; go_on = true; // expand ambiguous IJK nucleotide symbols into component bases XYZ for (p = 0; p < 4 && go_on; p++) { x = expansions [p]; if ((x & i) != 0) { for (q = 0; q < 4 && go_on; q++) { y = expansions [q]; if ((y & j) != 0) { for (r = 0; r < 4 && go_on; r++) { z = expansions [r]; if ((z & k) != 0) { // calculate offset in genetic code string // the T = 0, C = 1, A = 2, G = 3 order is // necessary because the genetic code strings // are presented in TCAG order in printed tables // and in the genetic code strings cd = 16 * codonIdx [x] + 4 * codonIdx [y] + codonIdx [z]; // lookup amino acid for codon XYZ ch = (*ncbieaa) [cd]; if (aa == '\0') { aa = ch; } else if (aa != ch) { // allow Asx (Asp or Asn) and Glx (Glu or Gln) if ((aa == 'B' || aa == 'D' || aa == 'N') && (ch == 'D' || ch == 'N')) { aa = 'B'; } else if ((aa == 'Z' || aa == 'E' || aa == 'Q') && (ch == 'E' || ch == 'Q')) { aa = 'Z'; } else { aa = 'X'; } } // lookup translation start flag ch = (*sncbieaa) [cd]; if (orf == '\0') { orf = ch; } else if (orf != ch) { orf = 'X'; } // drop out of loop as soon as answer is known if (aa == 'X' && orf == 'X') { go_on = false; } } } } } } } // assign amino acid and orf start if (aa != '\0') { m_AminoAcid [st] = aa; } if (orf != '\0') { m_OrfStart [st] = orf; } } } }}// implementation class for genetic code table and translation tablesclass CGen_code_table_imp : public CObject{public: // constructor CGen_code_table_imp(void); // destructor ~CGen_code_table_imp(void); // return initialized translation table given genetic code const CTrans_table& GetTransTable (int gc); const CTrans_table& GetTransTable (const CGenetic_code& gc); // return single copy loaded genetic code table for iteration const CGenetic_code_table& GetCodeTable (void); const string& GetNcbieaa(int id) const; const string& GetNcbieaa(const CGenetic_code& gc) const; const string& GetSncbieaa(int id) const; const string& GetSncbieaa(const CGenetic_code& gc) const;private: // genetic code table data CRef <CGenetic_code_table> m_GcTable; // typedefs typedef vector< CRef< CTrans_table > > TTransTablesById; // translation tables TTransTablesById m_TransTablesById; // local copy of genetic code table ASN.1 static const char * sm_GenCodeTblMemStr [];};// single instance of implementation class is initialized before Mainauto_ptr<CGen_code_table_imp> CGen_code_table::sm_Implementation;void CGen_code_table::x_InitImplementation(){ DEFINE_STATIC_FAST_MUTEX(s_Implementation_mutex); CFastMutexGuard LOCK(s_Implementation_mutex); if ( !sm_Implementation.get() ) { sm_Implementation.reset(new CGen_code_table_imp()); }}// public access functionsconst CTrans_table& CGen_code_table::GetTransTable (int id){ return x_GetImplementation().GetTransTable (id);}const CTrans_table& CGen_code_table::GetTransTable(const CGenetic_code& gc){ return x_GetImplementation().GetTransTable(gc);}const CGenetic_code_table& CGen_code_table::GetCodeTable(void){ return x_GetImplementation().GetCodeTable();}const string& CGen_code_table::GetNcbieaa(int id){ return x_GetImplementation().GetNcbieaa(id);}const string& CGen_code_table::GetNcbieaa(const CGenetic_code& gc){ return x_GetImplementation().GetNcbieaa(gc);}const string& CGen_code_table::GetSncbieaa(int id) { return x_GetImplementation().GetSncbieaa(id);}const string& CGen_code_table::GetSncbieaa(const CGenetic_code& gc){ return x_GetImplementation().GetSncbieaa(gc);}string CGen_code_table::IndexToCodon(int index){ if ( index < 0 || index > 63 ) return CNcbiEmptyString::Get(); static char na[4] = { 'T', 'C', 'A', 'G' }; string codon; codon.resize(3); int total = index; int div = 16; for ( int i = 0; i < 3; ++i ) { int j = total / div; codon[i] = na[j]; total -= div * j; div /= 4; } return codon;}int CGen_code_table::CodonToIndex(char base1, char base2, char base3){ string codon; codon.insert(codon.end(), base1); codon.insert(codon.end(), base2); codon.insert(codon.end(), base3); return CodonToIndex(codon);}static bool s_ValidCodon(const string& codon) { if ( codon.length() != 3 ) return false; for ( int i = 0; i < 3; ++i ) { char ch = toupper(codon[i]); if ( ch != 'A' && ch != 'G' && ch != 'C' && ch != 'T' && ch != 'U' ) { return false; } } return true;}int CGen_code_table::CodonToIndex(const string& codon){ if ( !s_ValidCodon(codon) ) return -1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -