📄 models.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: models.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:08:35 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.3 * PRODUCTION * =========================================================================== *//* $Id: models.cpp,v 1000.2 2004/06/01 18:08:35 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software / database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software / database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Mike DiCuccio * * File Description: * */#include <ncbi_pch.hpp>#include "gene_finder.hpp"BEGIN_NCBI_SCOPEconst double BadScore = -numeric_limits<double>::max();const double LnHalf = log(0.5);const double LnThree = log(3.0);const int toMinus[5] = { nT, nG, nC, nA, nN };const int TooFarLen = 500;const char* aa_table = "KNKNXTTTTTRSRSXIIMIXXXXXXQHQHXPPPPPRRRRRLLLLLXXXXXEDEDXAAAAAGGGGGVVVVVXXXXX * Y*YXSSSSS * CWCXLFLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";void MarkovChain<0>::InitScore(CNcbiIfstream& from){ Init(from); if (from) { toScore(); }}void MarkovChain<0>::Init(CNcbiIfstream& from){ from >> score[nA]; from >> score[nC]; from >> score[nG]; from >> score[nT]; score[nN] = 0.25; return; double sum = score[nA]+score[nC]+score[nG]+score[nT]; if (sum != 0) { score[nA] /= sum; score[nC] /= sum; score[nG] /= sum; score[nT] /= sum; }}void MarkovChain<0>::Average(Type& mc0, Type& mc1, Type& mc2, Type& mc3){ score[nA] = 0.25 * (mc0.score[nA]+mc1.score[nA]+mc2.score[nA]+mc3.score[nA]); score[nC] = 0.25 * (mc0.score[nC]+mc1.score[nC]+mc2.score[nC]+mc3.score[nC]); score[nG] = 0.25 * (mc0.score[nG]+mc1.score[nG]+mc2.score[nG]+mc3.score[nG]); score[nT] = 0.25 * (mc0.score[nT]+mc1.score[nT]+mc2.score[nT]+mc3.score[nT]); score[nN] = 0.25;}void MarkovChain<0>::toScore(){ score[nA] = (score[nA] <= 0) ? BadScore : log(4 * score[nA]); score[nC] = (score[nC] <= 0) ? BadScore : log(4 * score[nC]); score[nG] = (score[nG] <= 0) ? BadScore : log(4 * score[nG]); score[nT] = (score[nT] <= 0) ? BadScore : log(4 * score[nT]); score[nN] = (score[nN] <= 0) ? BadScore : log(4 * score[nN]);}pair<int,int> InputModel::FindContent(CNcbiIfstream& from, const string& label, int cgcontent){ string str; while (from >> str) { int low,high; if (str == label) { from >> str; if ( str != "CG:" ) { return make_pair(-1,-1); } from >> low >> high; if ( !from ) { return make_pair(-1,-1); } if ( cgcontent >= low && cgcontent < high) { return make_pair(low,high); } } } return make_pair(-1,-1);}InputModel::~InputModel(){}double MDD_Donor::Score(const IVec& seq, int i) const{ int first = i - left + 1; int last = i + right; if (first < 0 || last >= seq.size()) { return BadScore; // out of range } if (seq[i + 1] != nG || seq[i + 2] != nT) { return BadScore; // no GT } int mat = 0; for (int j = 0; j < position.size(); ++j) { if (seq[first + position[j]] != consensus[j]) { break; } ++mat; } return matrix[mat].Score(&seq[first]);}MDD_Donor::MDD_Donor(const string& file, int cgcontent){ string label = "[MDD_Donor]"; CNcbiIfstream from(file.c_str()); pair<int,int> cgrange = FindContent(from,label,cgcontent); if (cgrange.first < 0) { Error(label); } string str; from >> str; if ( str != "InExon:" ) { Error(label); } from >> inexon; if ( !from ) { Error(label); } from >> str; if ( str != "InIntron:" ) { Error(label); } from >> inintron; if ( !from ) { Error(label); } left = inexon; right = inintron; do { from >> str; if ( !from ) { Error(label); } if (str == "Position:") { int i; from >> i; if ( !from ) { Error(label); } position.push_back(i); from >> str; if ( !from ) { Error(label); } if ( str != "Consensus:" ) { Error(label); } char c; from >> c; if ( !from ) { Error(label); } i = ACGT(c); if (i == nN) { Error(label); } consensus.push_back(i); } matrix.push_back(MarkovChainArray<0>()); matrix.back().InitScore(inexon + inintron,from); if ( !from ) { Error(label); } } while (str != "End");}WMM_Start::WMM_Start(const string& file, int cgcontent){ string label = "[WMM_Start]"; CNcbiIfstream from(file.c_str()); pair<int,int> cgrange = FindContent(from,label,cgcontent); if (cgrange.first < 0) { Error(label); } string str; from >> str; if ( str != "InExon:" ) { Error(label); } from >> inexon; if ( !from ) { Error(label); } from >> str; if ( str != "InIntron:" ) { Error(label); } from >> inintron; if ( !from ) { Error(label); } left = inintron; right = inexon; matrix.InitScore(inexon + inintron,from); if ( !from ) { Error(label); }}double WMM_Start::Score(const IVec& seq, int i) const{ int first = i - left + 1; int last = i + right; if (first < 0 || last >= seq.size()) { return BadScore; // out of range } if (seq[i - 2] != nA || seq[i - 1] != nT || seq[i] != nG) { return BadScore; // no ATG } return matrix.Score(&seq[first]);}WAM_Stop::WAM_Stop(const string& file, int cgcontent){ string label = "[WAM_Stop_1]"; CNcbiIfstream from(file.c_str()); pair<int,int> cgrange = FindContent(from,label,cgcontent); if (cgrange.first < 0) { Error(label); } string str; from >> str; if ( str != "InExon:" ) { Error(label); } from >> inexon; if ( !from ) { Error(label); } from >> str; if ( str != "InIntron:" ) { Error(label); } from >> inintron; if ( !from ) { Error(label); } left = inexon; right = inintron; matrix.InitScore(inexon + inintron,from); if ( !from ) { Error(label); }}double WAM_Stop::Score(const IVec& seq, int i) const{ int first = i - left + 1; int last = i + right; if (first - 1 < 0 || last >= seq.size()) { return BadScore; } // out of range if ((seq[i + 1] != nT || seq[i + 2] != nA || seq[i + 3] != nA) && (seq[i + 1] != nT || seq[i + 2] != nA || seq[i + 3] != nG) && (seq[i + 1] != nT || seq[i + 2] != nG || seq[i + 3] != nA)) { return BadScore; // no stop - codon } return matrix.Score(&seq[first]);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -