📄 gene_finder.hpp
字号:
/* * =========================================================================== * PRODUCTION $Log: gene_finder.hpp,v $ * PRODUCTION Revision 1000.1 2003/11/21 21:31:53 gouriano * PRODUCTION PRODUCTION: UPGRADED [ORIGINAL] Dev-tree R1.2 * PRODUCTION * =========================================================================== */#ifndef __GENE_FINDER__HPP#define __GENE_FINDER__HPP/* $Id: gene_finder.hpp,v 1000.1 2003/11/21 21:31:53 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Alexandre Souvorov * * File Description: * */#include <corelib/ncbistd.hpp>#include <corelib/ncbi_limits.hpp>#include <algo/gnomon/gnomon_exception.hpp>#include <string>#include <vector>#include <list>#include <set>#include <algorithm>#include <math.h>BEGIN_NCBI_SCOPEtypedef vector<char> CVec;typedef vector<int> IVec;typedef vector<double> DVec;struct IPair : pair<int,int> { IPair(int f, int s) : pair<int,int>(f,s) {} bool operator<(const IPair& p) const { return second < p.first; } bool operator>(const IPair& p) const { return first > p.second; } bool Intersect(const IPair& p) const { return !(*this < p || *this > p); } bool Include(int i) const { return (i >= first && i <= second); }};enum Nucleotides { nA, nC, nG, nT, nN };enum { Plus, Minus };extern const double BadScore;extern const double LnHalf;extern const double LnThree;extern const int toMinus[5];extern const int TooFarLen;extern const char* aa_table;template<int order> class MarkovChain{public: void InitScore(CNcbiIfstream& from); double Score(const int* seq) const { return next[*(seq-order)].Score(seq); }private: typedef MarkovChain<order> Type; friend class MarkovChain<order+1>; void Init(CNcbiIfstream& from); void Average(Type& mc0, Type& mc1, Type& mc2, Type& mc3); void toScore(); MarkovChain<order-1> next[5];};template<> class MarkovChain<0>{public: void InitScore(CNcbiIfstream& from); double Score(const int* seq) const { return score[*seq]; }private: typedef MarkovChain<0> Type; friend class MarkovChain<1>; void Init(CNcbiIfstream& from); void Average(Type& mc0, Type& mc1, Type& mc2, Type& mc3); void toScore(); double score[5];};template<int order> class MarkovChainArray{public: void InitScore(int l, CNcbiIfstream& from); double Score(const int* seq) const;private: int length; vector< MarkovChain<order> > mc;};class InputModel{public: virtual ~InputModel() = 0;protected: static void Error(const string& label) { NCBI_THROW(CGnomonException, eGenericError, label); } static pair<int,int> FindContent(CNcbiIfstream& from, const string& label, int cgcontent);};//Terminal's score is located on the last position of the left stateclass Terminal : public InputModel{public: int InExon() const { return inexon; } int InIntron() const { return inintron; } int Left() const { return left; } int Right() const { return right; } virtual double Score(const IVec& seq, int i) const = 0; ~Terminal() {}protected: int inexon, inintron, left, right;};class MDD_Donor : public Terminal{public: MDD_Donor(const string& file, int cgcontent); double Score(const IVec& seq, int i) const;private: IVec position, consensus; vector< MarkovChainArray<0> > matrix;};template<int order> class WAM_Donor : public Terminal{public: WAM_Donor(const string& file, int cgcontent); double Score(const IVec& seq, int i) const;private: MarkovChainArray<order> matrix;};template<int order> class WAM_Acceptor : public Terminal{public: WAM_Acceptor(const string& file, int cgcontent); double Score(const IVec& seq, int i) const;private: MarkovChainArray<order> matrix;};class WMM_Start : public Terminal{public: WMM_Start(const string& file, int cgcontent); double Score(const IVec& seq, int i) const;private: MarkovChainArray<0> matrix;};class WAM_Stop : public Terminal{public: WAM_Stop(const string& file, int cgcontent); double Score(const IVec& seq, int i) const;private: MarkovChainArray<1> matrix;};class CodingRegion : public InputModel{public: virtual double Score(const IVec& seq, int i, int codonshift) const = 0; ~CodingRegion() {}};template<int order> class MC3_CodingRegion : public CodingRegion{public: MC3_CodingRegion(const string& file, int cgcontent); double Score(const IVec& seq, int i, int codonshift) const;private: MarkovChain<order> matrix[3];};class NonCodingRegion : public InputModel{public: virtual double Score(const IVec& seq, int i) const = 0; ~NonCodingRegion() {}};template<int order> class MC_NonCodingRegion : public NonCodingRegion{public: MC_NonCodingRegion(const string& file, int cgcontent); double Score(const IVec& seq, int i) const;private: MarkovChain<order> matrix;};class NullRegion : public NonCodingRegion{public: double Score(const IVec& seq, int i) const { return 0; };};class AlignVec : public vector<IPair>{public: typedef vector<IPair>::iterator It; typedef vector<IPair>::const_iterator ConstIt; enum { Prot, EST, mRNA, RefSeq, RefSeqBest, Wall}; AlignVec(int s = Plus, int i = 0, int t = EST, IPair cdl = IPair(-1,-1)) : type(t), strand(s), id(i), limits(numeric_limits<int>::max(),0), cds_limits(cdl), score(BadScore) {} void Insert(IPair p); void Erase(int i); IPair Limits() const { return limits; } IPair CdsLimits() const { return cds_limits; } void SetCdsLimits(IPair p) { cds_limits = p; } bool Intersect(const AlignVec& a) const { return limits.Intersect(a.limits); } void SetStrand(int s) { strand = s; } int Strand() const { return strand; } void SetType(int t) { type = t; } int Type() const { return type; } void SetID(int i) { id = i; } int ID() const { return id; } bool operator<(const AlignVec& a) const { return limits < a.limits; } void Init(); void SetScore(double s) { score = s; } double Score() const { return score; } void Extend(const AlignVec& a); private: int type, strand, id; IPair limits, cds_limits; double score;};CNcbiIstream& operator>>(CNcbiIstream& s, AlignVec& a);CNcbiOstream& operator<<(CNcbiOstream& s, const AlignVec& a);class CCluster : public list<AlignVec>{public: typedef list<AlignVec>::iterator It; typedef list<AlignVec>::const_iterator ConstIt; CCluster(int f = numeric_limits<int>::max(), int s = 0, int t = AlignVec::Prot) : limits(f,s), type(t) {} void Insert(const AlignVec& a); void Insert(const CCluster& c); IPair Limits() const { return limits; } int Type() const { return type; } bool operator<(const CCluster& c) const { return limits < c.limits; } void Init(int first, int second, int t);private: IPair limits; int type;};CNcbiIstream& operator>>(CNcbiIstream& s, CCluster& c);CNcbiOstream& operator<<(CNcbiOstream& s, const CCluster& c);class CClusterSet : public set<CCluster>{public: typedef set<CCluster>::iterator It; typedef set<CCluster>::const_iterator ConstIt; CClusterSet() {} CClusterSet(string c) : contig(c) {} void InsertAlignment(const AlignVec& a); void InsertCluster(CCluster c); string Contig() const { return contig; } void Init(string cnt);private: string contig;};CNcbiIstream& operator>>(CNcbiIstream& s, CClusterSet& cls);CNcbiOstream& operator<<(CNcbiOstream& s, const CClusterSet& cls);class CFrameShiftInfo{public: CFrameShiftInfo(int l, bool is_i, char i_v = 0) : loc(l), is_insert(is_i), insert_value(i_v) {} int Loc() const { return loc; } bool IsInsertion() const { return is_insert; } bool IsDeletion() const { return !is_insert; } char InsertValue() const { return insert_value; } bool operator<(const CFrameShiftInfo& fsi) const { return loc < fsi.loc; }private: int loc; // location for deletion, insertion before location // incertion - when I INCERT a new base in sequence bool is_insert; char insert_value;};typedef vector<CFrameShiftInfo> TFrameShifts;class SeqScores{public: SeqScores (Terminal& a, Terminal& d, Terminal& stt, Terminal& stp, CodingRegion& cr, NonCodingRegion& ncr, NonCodingRegion& ing, CVec& sequence, int from, int to, const CClusterSet& cls, const TFrameShifts& initial_fshifts, bool repeats, bool leftwall, bool rightwall, string cntg); int Shift() const { return shift; } int AcceptorNumber(int strand) const { return anum[strand]; } int DonorNumber(int strand) const { return dnum[strand]; } int StartNumber(int strand) const { return sttnum[strand]; } int StopNumber(int strand) const { return stpnum[strand]; } double AcceptorScore(int i, int strand) const { return ascr[strand][i]; } double DonorScore(int i, int strand) const { return dscr[strand][i]; } double StartScore(int i, int strand) const { return sttscr[strand][i]; } double StopScore(int i, int strand) const { return stpscr[strand][i]; } Terminal& Acceptor() const { return acceptor; } Terminal& Donor() const { return donor; } Terminal& Start() const { return start; } Terminal& Stop() const { return stop; } const CClusterSet& Alignments() const { return cluster_set; } const TFrameShifts& SeqFrameShifts() const { return fshifts; } string Contig() const { return contig; } bool StopInside(int a, int b, int strand, int frame) const; bool OpenCodingRegion(int a, int b, int strand, int frame) const; double CodingScore(int a, int b, int strand, int frame) const; bool OpenNonCodingRegion(int a, int b, int strand) const; double NonCodingScore(int a, int b, int strand) const;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -