⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 signal_seq.cpp

📁 ncbi源码
💻 CPP
字号:
/* * =========================================================================== * PRODUCTION $Log: signal_seq.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 18:10:56  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * PRODUCTION * =========================================================================== *//*  $Id: signal_seq.cpp,v 1000.2 2004/06/01 18:10:56 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Josh Cherry * * File Description:  Prediction of signal sequences from protein sequence *                    according to von Heijne, 1986 and 1987 * */#include <ncbi_pch.hpp>#include <algo/sequence/signal_seq.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);// Scoring matrix for eukaryotic signal sequences// we have to declare these extern to be accessible from template on ForteCC.extern const double const_EukMat[26][15];extern const double const_BacMat[26][15];const double const_EukMat[26][15] = {    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},    {0.0984401, -0.109199, -0.0350913, 0.0339016, 0.321584,     0.216223, 0.216223, 0.159065, 0.544727, 0.0339016,      1.176, -0.882389, 1.70788, 0.216223, -0.882389}, // A     {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // B (?)    {-0.405465, 0.287682, 0.693147, 0.441833, 0.693147,     1.13498, 0.287682, 0.575364, 0.105361, 0.287682,     1.44036, -0.405465, 0.693147, 0.575364, -0.405465}, // C    {-2.18605, -2.18605, -2.18605, -2.18605, -2.18605,     -2.18605, -2.18605, -2.18605, -0.576613, -1.08744,     -25.2119, -0.576613, -25.2119, 0.116534, 0.211844}, // D    {-2.30259, -2.30259, -2.30259, -2.30259, -2.30259,     -2.30259, -2.30259, -2.30259, -1.20397, -0.356675,     -25.3284, -0.356675, -25.3284, 0.262364, 0.336472}, // E    {0.842183, 0.474458, 0.675129, 0.675129, 0.0689929,     0.223144, 1.16761, 0.842183, -0.336472, -0.113329,     -24.7486, 0.842183, -24.7486, 0.0689929, -0.336472}, // F    {-1.10691, -1.10691, -1.39459, -0.701446, -1.39459,     0.0717439, -1.39459, -1.80006, 0.451234, 1.03316,     -0.883768, -0.547295, 1.17036, -0.19062, -0.547295}, // G    {-1.22378, -1.22378, -1.22378, -1.22378, -1.22378,     -1.22378, -1.22378, -1.22378, 0.385662, -1.22378,     -24.2496, 0.567984, -24.2496, 0.162519, -0.530628}, // H    {0.70657, 0.70657, 0.0779615, -0.209721, 0.396415,     -0.392042, -0.615186, 0.0779615, -0.392042, -2.00148,     0.301105, -0.392042, -25.0273, 0.0779615, -0.0555699}, // I    {-2.4248, -2.4248, -2.4248, -2.4248, -2.4248,     -2.4248, -2.4248, -2.4248, -2.4248, -1.03851,     -25.4507, -1.73166, -25.4507, -0.0269075, -0.227578}, // K    {1.76947, 1.7263, 1.78346, 1.87624, 1.8635,     1.31346, 1.66568, 1.39861, -0.19062, 0.642289,     -0.413764, 0.502527, -2.49321, -0.413764, -1.10691}, // L    {-0.993252, 0.105361, 0.952658, 0.393043, -0.993252,     0.798508, -0.300105, -0.300105, -0.993252, -0.993252,     -24.0191, -0.993252, -24.0191, -0.993252, -0.300105}, // M    {-1.96009, -1.96009, -1.96009, -1.96009, -1.96009,     -1.96009, -1.96009, -1.96009, -0.861482, -0.861482,     -24.9859, 0.34249, -24.9859, -0.5738, -0.0141846}, // N    {-1.30833, -2.00148, -1.30833, -2.00148, -2.00148,     -0.615186, -2.00148, 0.0779615, 0.994252, 0.637577,     -25.0273, -2.00148, -0.902868, -2.00148, 1.08956}, // P    {-1.84055, -1.84055, -1.84055, -1.84055, -1.84055,     -0.0487902, -1.84055, -1.84055, 0.462035, 0.238892,     -24.8664, 1.04982, -0.741937, 1.10389, 0.462035}, // Q    {-1.335, -2.02815, -2.02815, -2.02815, -2.02815,     -2.02815, -2.02815, -2.02815, -0.0822381, -0.641854,     -25.054, 0.679902, -25.054, 0.456758, 0.169076}, // R    {-0.236389, -1.335, -0.354172, -0.641854, 0.131336,     -0.131028, 0.274437, 0.338975, 0.824483, -0.0357181,     0.701881, 0.3996, 0.562119, 0.274437, -0.131028}, // S    {-1.57898, 0.0304592, -0.662688, -0.885832, -0.662688,     0.292823, -0.326216, -0.326216, 0.212781, -0.480366,     0.561087, -0.192684, -0.480366, -1.17351, 0.0304592}, // T    {0.588787, 0.811931, 0.301105, 0.483427, 0.158004,     0.301105, -0.00904984, 0.888892, -2.40695, 0.0779615,     1.05879, -1.30833, -25.4328, -0.327504, 0.426268}, // V    {0.798508, 0.510826, 0.510826, -0.587787, -0.587787,     0.105361, 1.20397, 0.510826, -0.587787, 0.510826,     -23.6136, 1.60944, -23.6136, 0.105361, -0.587787}, // W    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // X (?)    {-1.72277, -1.72277, -0.336472, -1.72277, -1.72277,     -1.72277, -0.624154, -1.72277, -1.72277, -1.02962,     -24.7486, -0.113329, -24.7486, -1.72277, 0.223144}, // Y    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Z (?)    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // U (?)    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // * (?)};// Scoring matrix for bacterial signal sequencesconst double const_BacMat[26][15] = {    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},    {1.13943, 0.916291, 0.916291, 1.03407, 0.628609,     0.782759, 0.446287, 0.628609, 0.782759, 0.782759,     2.0149, -0.470004, 2.27084, 1.72722, 0.223144}, // A    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // B (?)    {0, 0, 0, 0, 0,     0, 0, 0, 0, 0,     -23.0259, 0, -23.0259, 0, 0}, // C    {-0.693147, -0.693147, -0.693147, -0.693147, -0.693147,     -0.693147, -0.693147, -0.693147, -0.693147, -0.693147,     -23.719, -0.693147, -23.719, 0, 1.38629}, // D    {-0.788457, -0.788457, -0.788457, -0.788457, -0.788457,     -0.788457, -0.788457, -0.788457, -0.788457, -0.788457,     -23.8143, -0.788457, -23.8143, 0.597837, 1.29098}, // E    {0.430783, 1.12393, 0.836248, 1.12393, -0.262364,     -0.262364, 1.81708, -0.262364, 1.12393, -0.262364,     -23.2882, 1.68355, -23.2882, -0.262364, -0.262364}, // F    {0.393043, -0.300105, -0.300105, -0.300105, 0.105361,     0.616186, -0.300105, 0.393043, -0.300105, -0.300105,     -24.0191, -0.300105, -0.300105, -0.993252, -0.993252}, // G    {0.223144, 0.223144, 0.223144, 0.223144, 0.223144,     0.223144, 0.223144, 0.223144, 0.223144, 0.223144,     -22.8027, 2.16905, -22.8027, 0.223144, 0.223144}, // H    {0.567984, -0.530628, 1.07881, -0.530628, 1.07881,     -0.530628, -0.530628, 0.567984, -0.530628, -0.530628,     -23.5565, -0.530628, -23.5565, -0.530628, 0.162519}, // I    {-0.916291, -0.916291, -0.916291, -0.916291, -0.916291,     -0.916291, -0.916291, -0.916291, -0.916291, -0.916291,     -23.9421, -0.223144, -23.9421, 0.182322, -0.916291}, // K    {1.08619, 1.40464, 1.20397, 1.08619, 1.20397,     1.5717, -0.993252, -0.993252, -0.300105, -0.300105,     -0.993252, -0.300105, -24.0191, -0.993252, -0.993252}, // L    {0.510826, 1.20397, 0.510826, 0.510826, 1.60944,     1.20397, 1.60944, 0.510826, 0.510826, 1.20397,     -22.515, 1.89712, -22.515, 0.510826, 0.510826}, // M    {-0.470004, -0.470004, -0.470004, -0.470004, -0.470004,     -0.470004, -0.470004, -0.470004, -0.470004, -0.470004,     -23.4959, 0.628609, -23.4959, -0.470004, 0.916291}, // N    {-0.530628, -0.530628, -0.530628, -0.530628, -0.530628,     -0.530628, 0.162519, 0.567984, 1.07881, 0.162519,     -23.5565, -0.530628, -23.5565, -0.530628, 1.07881}, // P    {-0.336472, -0.336472, -0.336472, -0.336472, -0.336472,     -0.336472, -0.336472, -0.336472, 0.356675, 0.356675,     -23.3623, 0.76214, -23.3623, -0.336472, -0.336472}, // Q    {-0.530628, -0.530628, -0.530628, -0.530628, -0.530628,     -0.530628, -0.530628, -0.530628, -0.530628, -0.530628,     -23.5565, -0.530628, -23.5565, -0.530628, -0.530628}, // R    {-0.955511, -0.955511, -0.955511, 0.430783, 0.430783,     -0.955511, 0.653926, 1.75254, 0.653926, 1.12393,     0.653926, -0.262364, -0.262364, -0.955511, -0.955511}, // S    {-0.0953102, -0.788457, 0.597837, -0.0953102, -0.0953102,     -0.0953102, -0.0953102, -0.0953102, 0.820981, -0.788457,     0.310155, -0.788457, -0.788457, -0.788457, -0.0953102}, // T    {0.693147, 1.02962, -0.916291, 0.182322, -0.916291,     0.470004, 1.02962, -0.916291, -0.916291, 0.470004,     0.182322, -0.916291, -23.9421, -0.223144, -0.916291}, // V    {0.916291, 0.916291, 0.916291, 0.916291, 0.916291,     0.916291, 0.916291, 0.916291, 0.916291, 0.916291,     -22.1096, 0.916291, -22.1096, 0.916291, 0.916291}, // W    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // X (?)    {-0.262364, -0.262364, -0.262364, -0.262364, -0.262364,     -0.262364, -0.262364, -0.262364, -0.262364, 0.836248,     -23.2882, -0.262364, -23.2882, -0.262364, -0.262364}, // Y    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Z (?)    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // U (?)    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}  // * (?)};template<class Seq>void x_PredictSignalSeq(const Seq& seq, CSignalSeq::EDomain domain,                        TSeqPos max_pos, TSeqPos& pos, double& score){    const double (*Mat)[15];    if (domain == CSignalSeq::eBacterial) {        Mat = const_BacMat;    } else {        Mat = const_EukMat;    }    TSeqPos max_index = min((TSeqPos)seq.size() - 15, max_pos - 12);        double max_score = -1e6;    TSeqPos max_loc;    for (unsigned int i = 0;  i <= max_index;  i++) {        double sum = 0;        for (unsigned int j = 0;  j < 15;  j++) {            sum += Mat[seq[i + j]][j];        }        if (sum > max_score) {            max_score = sum;            max_loc = i;        }    }    score = max_score;    pos = max_loc + 12;  // position before cut}void CSignalSeq::Predict(const string& seq, EDomain domain,                         TSeqPos max_pos, TSeqPos& pos, double& score){    x_PredictSignalSeq(seq, domain, max_pos, pos, score);}void CSignalSeq::Predict(const vector<char>& seq, EDomain domain,                         TSeqPos max_pos, TSeqPos& pos, double& score){    x_PredictSignalSeq(seq, domain, max_pos, pos, score);}void CSignalSeq::Predict(const CSeqVector& seq, EDomain domain,                         TSeqPos max_pos, TSeqPos& pos, double& score){    string seq_ncbistdaa;    CSeqVector vec(seq);    vec.SetNcbiCoding();    vec.GetSeqData(0, vec.size(), seq_ncbistdaa);    x_PredictSignalSeq(seq_ncbistdaa, domain, max_pos, pos, score);}END_NCBI_SCOPE/* * =========================================================================== * $Log: signal_seq.cpp,v $ * Revision 1000.2  2004/06/01 18:10:56  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * * Revision 1.7  2004/05/21 21:41:04  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.6  2004/03/16 19:40:09  vasilche * Made static const arrays accessible from template on ForteCC * * Revision 1.5  2004/03/15 12:30:19  dicuccio * Changed name of const arrays * * Revision 1.4  2004/03/12 19:59:31  dicuccio * Dropped static on private arrays as WorkShop doesn't let templates use such * arrays * * Revision 1.3  2004/03/11 17:27:16  dicuccio * Changed static member arrays to private static arrays * * Revision 1.2  2003/09/10 17:55:04  ucko * Add a cast to fix 64-bit compilation. * * Revision 1.1  2003/09/10 15:31:34  jcherry * Initial version * * =========================================================================== */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -