📄 signal_seq.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: signal_seq.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:10:56 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * PRODUCTION * =========================================================================== *//* $Id: signal_seq.cpp,v 1000.2 2004/06/01 18:10:56 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Josh Cherry * * File Description: Prediction of signal sequences from protein sequence * according to von Heijne, 1986 and 1987 * */#include <ncbi_pch.hpp>#include <algo/sequence/signal_seq.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);// Scoring matrix for eukaryotic signal sequences// we have to declare these extern to be accessible from template on ForteCC.extern const double const_EukMat[26][15];extern const double const_BacMat[26][15];const double const_EukMat[26][15] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0.0984401, -0.109199, -0.0350913, 0.0339016, 0.321584, 0.216223, 0.216223, 0.159065, 0.544727, 0.0339016, 1.176, -0.882389, 1.70788, 0.216223, -0.882389}, // A {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // B (?) {-0.405465, 0.287682, 0.693147, 0.441833, 0.693147, 1.13498, 0.287682, 0.575364, 0.105361, 0.287682, 1.44036, -0.405465, 0.693147, 0.575364, -0.405465}, // C {-2.18605, -2.18605, -2.18605, -2.18605, -2.18605, -2.18605, -2.18605, -2.18605, -0.576613, -1.08744, -25.2119, -0.576613, -25.2119, 0.116534, 0.211844}, // D {-2.30259, -2.30259, -2.30259, -2.30259, -2.30259, -2.30259, -2.30259, -2.30259, -1.20397, -0.356675, -25.3284, -0.356675, -25.3284, 0.262364, 0.336472}, // E {0.842183, 0.474458, 0.675129, 0.675129, 0.0689929, 0.223144, 1.16761, 0.842183, -0.336472, -0.113329, -24.7486, 0.842183, -24.7486, 0.0689929, -0.336472}, // F {-1.10691, -1.10691, -1.39459, -0.701446, -1.39459, 0.0717439, -1.39459, -1.80006, 0.451234, 1.03316, -0.883768, -0.547295, 1.17036, -0.19062, -0.547295}, // G {-1.22378, -1.22378, -1.22378, -1.22378, -1.22378, -1.22378, -1.22378, -1.22378, 0.385662, -1.22378, -24.2496, 0.567984, -24.2496, 0.162519, -0.530628}, // H {0.70657, 0.70657, 0.0779615, -0.209721, 0.396415, -0.392042, -0.615186, 0.0779615, -0.392042, -2.00148, 0.301105, -0.392042, -25.0273, 0.0779615, -0.0555699}, // I {-2.4248, -2.4248, -2.4248, -2.4248, -2.4248, -2.4248, -2.4248, -2.4248, -2.4248, -1.03851, -25.4507, -1.73166, -25.4507, -0.0269075, -0.227578}, // K {1.76947, 1.7263, 1.78346, 1.87624, 1.8635, 1.31346, 1.66568, 1.39861, -0.19062, 0.642289, -0.413764, 0.502527, -2.49321, -0.413764, -1.10691}, // L {-0.993252, 0.105361, 0.952658, 0.393043, -0.993252, 0.798508, -0.300105, -0.300105, -0.993252, -0.993252, -24.0191, -0.993252, -24.0191, -0.993252, -0.300105}, // M {-1.96009, -1.96009, -1.96009, -1.96009, -1.96009, -1.96009, -1.96009, -1.96009, -0.861482, -0.861482, -24.9859, 0.34249, -24.9859, -0.5738, -0.0141846}, // N {-1.30833, -2.00148, -1.30833, -2.00148, -2.00148, -0.615186, -2.00148, 0.0779615, 0.994252, 0.637577, -25.0273, -2.00148, -0.902868, -2.00148, 1.08956}, // P {-1.84055, -1.84055, -1.84055, -1.84055, -1.84055, -0.0487902, -1.84055, -1.84055, 0.462035, 0.238892, -24.8664, 1.04982, -0.741937, 1.10389, 0.462035}, // Q {-1.335, -2.02815, -2.02815, -2.02815, -2.02815, -2.02815, -2.02815, -2.02815, -0.0822381, -0.641854, -25.054, 0.679902, -25.054, 0.456758, 0.169076}, // R {-0.236389, -1.335, -0.354172, -0.641854, 0.131336, -0.131028, 0.274437, 0.338975, 0.824483, -0.0357181, 0.701881, 0.3996, 0.562119, 0.274437, -0.131028}, // S {-1.57898, 0.0304592, -0.662688, -0.885832, -0.662688, 0.292823, -0.326216, -0.326216, 0.212781, -0.480366, 0.561087, -0.192684, -0.480366, -1.17351, 0.0304592}, // T {0.588787, 0.811931, 0.301105, 0.483427, 0.158004, 0.301105, -0.00904984, 0.888892, -2.40695, 0.0779615, 1.05879, -1.30833, -25.4328, -0.327504, 0.426268}, // V {0.798508, 0.510826, 0.510826, -0.587787, -0.587787, 0.105361, 1.20397, 0.510826, -0.587787, 0.510826, -23.6136, 1.60944, -23.6136, 0.105361, -0.587787}, // W {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // X (?) {-1.72277, -1.72277, -0.336472, -1.72277, -1.72277, -1.72277, -0.624154, -1.72277, -1.72277, -1.02962, -24.7486, -0.113329, -24.7486, -1.72277, 0.223144}, // Y {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Z (?) {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // U (?) {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // * (?)};// Scoring matrix for bacterial signal sequencesconst double const_BacMat[26][15] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {1.13943, 0.916291, 0.916291, 1.03407, 0.628609, 0.782759, 0.446287, 0.628609, 0.782759, 0.782759, 2.0149, -0.470004, 2.27084, 1.72722, 0.223144}, // A {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // B (?) {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -23.0259, 0, -23.0259, 0, 0}, // C {-0.693147, -0.693147, -0.693147, -0.693147, -0.693147, -0.693147, -0.693147, -0.693147, -0.693147, -0.693147, -23.719, -0.693147, -23.719, 0, 1.38629}, // D {-0.788457, -0.788457, -0.788457, -0.788457, -0.788457, -0.788457, -0.788457, -0.788457, -0.788457, -0.788457, -23.8143, -0.788457, -23.8143, 0.597837, 1.29098}, // E {0.430783, 1.12393, 0.836248, 1.12393, -0.262364, -0.262364, 1.81708, -0.262364, 1.12393, -0.262364, -23.2882, 1.68355, -23.2882, -0.262364, -0.262364}, // F {0.393043, -0.300105, -0.300105, -0.300105, 0.105361, 0.616186, -0.300105, 0.393043, -0.300105, -0.300105, -24.0191, -0.300105, -0.300105, -0.993252, -0.993252}, // G {0.223144, 0.223144, 0.223144, 0.223144, 0.223144, 0.223144, 0.223144, 0.223144, 0.223144, 0.223144, -22.8027, 2.16905, -22.8027, 0.223144, 0.223144}, // H {0.567984, -0.530628, 1.07881, -0.530628, 1.07881, -0.530628, -0.530628, 0.567984, -0.530628, -0.530628, -23.5565, -0.530628, -23.5565, -0.530628, 0.162519}, // I {-0.916291, -0.916291, -0.916291, -0.916291, -0.916291, -0.916291, -0.916291, -0.916291, -0.916291, -0.916291, -23.9421, -0.223144, -23.9421, 0.182322, -0.916291}, // K {1.08619, 1.40464, 1.20397, 1.08619, 1.20397, 1.5717, -0.993252, -0.993252, -0.300105, -0.300105, -0.993252, -0.300105, -24.0191, -0.993252, -0.993252}, // L {0.510826, 1.20397, 0.510826, 0.510826, 1.60944, 1.20397, 1.60944, 0.510826, 0.510826, 1.20397, -22.515, 1.89712, -22.515, 0.510826, 0.510826}, // M {-0.470004, -0.470004, -0.470004, -0.470004, -0.470004, -0.470004, -0.470004, -0.470004, -0.470004, -0.470004, -23.4959, 0.628609, -23.4959, -0.470004, 0.916291}, // N {-0.530628, -0.530628, -0.530628, -0.530628, -0.530628, -0.530628, 0.162519, 0.567984, 1.07881, 0.162519, -23.5565, -0.530628, -23.5565, -0.530628, 1.07881}, // P {-0.336472, -0.336472, -0.336472, -0.336472, -0.336472, -0.336472, -0.336472, -0.336472, 0.356675, 0.356675, -23.3623, 0.76214, -23.3623, -0.336472, -0.336472}, // Q {-0.530628, -0.530628, -0.530628, -0.530628, -0.530628, -0.530628, -0.530628, -0.530628, -0.530628, -0.530628, -23.5565, -0.530628, -23.5565, -0.530628, -0.530628}, // R {-0.955511, -0.955511, -0.955511, 0.430783, 0.430783, -0.955511, 0.653926, 1.75254, 0.653926, 1.12393, 0.653926, -0.262364, -0.262364, -0.955511, -0.955511}, // S {-0.0953102, -0.788457, 0.597837, -0.0953102, -0.0953102, -0.0953102, -0.0953102, -0.0953102, 0.820981, -0.788457, 0.310155, -0.788457, -0.788457, -0.788457, -0.0953102}, // T {0.693147, 1.02962, -0.916291, 0.182322, -0.916291, 0.470004, 1.02962, -0.916291, -0.916291, 0.470004, 0.182322, -0.916291, -23.9421, -0.223144, -0.916291}, // V {0.916291, 0.916291, 0.916291, 0.916291, 0.916291, 0.916291, 0.916291, 0.916291, 0.916291, 0.916291, -22.1096, 0.916291, -22.1096, 0.916291, 0.916291}, // W {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // X (?) {-0.262364, -0.262364, -0.262364, -0.262364, -0.262364, -0.262364, -0.262364, -0.262364, -0.262364, 0.836248, -23.2882, -0.262364, -23.2882, -0.262364, -0.262364}, // Y {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Z (?) {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // U (?) {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // * (?)};template<class Seq>void x_PredictSignalSeq(const Seq& seq, CSignalSeq::EDomain domain, TSeqPos max_pos, TSeqPos& pos, double& score){ const double (*Mat)[15]; if (domain == CSignalSeq::eBacterial) { Mat = const_BacMat; } else { Mat = const_EukMat; } TSeqPos max_index = min((TSeqPos)seq.size() - 15, max_pos - 12); double max_score = -1e6; TSeqPos max_loc; for (unsigned int i = 0; i <= max_index; i++) { double sum = 0; for (unsigned int j = 0; j < 15; j++) { sum += Mat[seq[i + j]][j]; } if (sum > max_score) { max_score = sum; max_loc = i; } } score = max_score; pos = max_loc + 12; // position before cut}void CSignalSeq::Predict(const string& seq, EDomain domain, TSeqPos max_pos, TSeqPos& pos, double& score){ x_PredictSignalSeq(seq, domain, max_pos, pos, score);}void CSignalSeq::Predict(const vector<char>& seq, EDomain domain, TSeqPos max_pos, TSeqPos& pos, double& score){ x_PredictSignalSeq(seq, domain, max_pos, pos, score);}void CSignalSeq::Predict(const CSeqVector& seq, EDomain domain, TSeqPos max_pos, TSeqPos& pos, double& score){ string seq_ncbistdaa; CSeqVector vec(seq); vec.SetNcbiCoding(); vec.GetSeqData(0, vec.size(), seq_ncbistdaa); x_PredictSignalSeq(seq_ncbistdaa, domain, max_pos, pos, score);}END_NCBI_SCOPE/* * =========================================================================== * $Log: signal_seq.cpp,v $ * Revision 1000.2 2004/06/01 18:10:56 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * * Revision 1.7 2004/05/21 21:41:04 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.6 2004/03/16 19:40:09 vasilche * Made static const arrays accessible from template on ForteCC * * Revision 1.5 2004/03/15 12:30:19 dicuccio * Changed name of const arrays * * Revision 1.4 2004/03/12 19:59:31 dicuccio * Dropped static on private arrays as WorkShop doesn't let templates use such * arrays * * Revision 1.3 2004/03/11 17:27:16 dicuccio * Changed static member arrays to private static arrays * * Revision 1.2 2003/09/10 17:55:04 ucko * Add a cast to fix 64-bit compilation. * * Revision 1.1 2003/09/10 15:31:34 jcherry * Initial version * * =========================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -