📄 gene_finder.hpp
字号:
bool OpenIntergenicRegion(int a, int b) const; bool InAlignment(int a, int b) const; double IntergenicScore(int a, int b, int strand) const; int SeqLen() const { return seq[0].size(); } bool SplittedStop(int id, int ia, int strand, int ph) const { return (dsplit[strand][ph][id]&asplit[strand][ph][ia]) ? true : false; } bool isStart(int i, int strand) const; bool isStop(int i, int strand) const; bool isAG(int i, int strand) const; bool isGT(int i, int strand) const; bool isConsensusIntron(int i, int j, int strand) const; const int* SeqPtr(int i, int strand) const; int SeqMap(int i, bool forwrd) const; // maps new coordinates to old coordinates, // if insertion gives next or previous point // depending on forwrd int RevSeqMap(int i, bool forwrd) const; // maps old coordinates to new coordinates, // if deletion gives next or previous point // depending on forwrdprivate: Terminal &acceptor, &donor, &start, &stop; CodingRegion &cdr; NonCodingRegion &ncdr, &intrg; const CClusterSet& cluster_set; TFrameShifts fshifts; IVec seq[2], laststop[2][3], notinexon[2][3], notinintron[2], notining; IVec seq_map, rev_seq_map; DVec ascr[2], dscr[2], sttscr[2], stpscr[2], ncdrscr[2], ingscr[2], cdrscr[2][3]; IVec asplit[2][2], dsplit[2][2]; IVec inalign; int anum[2], dnum[2], sttnum[2], stpnum[2]; int shift; string contig;};struct StateScores{ double score,branch,length,region,term;};template<class State> StateScores CalcStateScores(const State& st){ StateScores sc; if(st.NoLeftEnd()) { if(st.NoRightEnd()) sc.length = st.ThroughLengthScore(); else sc.length = st.InitialLengthScore(); } else { if(st.NoRightEnd()) sc.length = st.ClosingLengthScore(); else sc.length = st.LengthScore(); } sc.region = st.RgnScore(); sc.term = st.TermScore(); if(sc.term == BadScore) sc.term = 0; sc.score = st.Score(); if(st.LeftState()) sc.score -= st.LeftState()->Score(); sc.branch = sc.score-sc.length-sc.region-sc.term; return sc;}class Lorentz{public: bool Init(CNcbiIstream& from, const string& label); double Score(int l) const { return score[(l-1)/step]; } double ClosingScore(int l) const; int MinLen() const { return minl; } int MaxLen() const { return maxl; } double AvLen() const { return avlen; } double Through(int seqlen) const;private: int minl, maxl, step; double A, L, avlen, lnthrough; DVec score, clscore;};class HMM_State : public InputModel{public: HMM_State(int strn, int point); const HMM_State* LeftState() const { return leftstate; } const Terminal* TerminalPtr() const { return terminal; } void UpdateLeftState(const HMM_State& left) { leftstate = &left; } void UpdateScore(double scr) { score = scr; } int MaxLen() const { return numeric_limits<int>::max(); }; int MinLen() const; bool StopInside() const { return false; } bool InAlignment() const { return false; } int Strand() const { return strand; } bool isPlus() const { return (strand == Plus); } bool isMinus() const { return (strand == Minus); } double Score() const { return score; } int Start() const { return leftstate ? leftstate->stop+1 : 0; } bool NoRightEnd() const { return stop < 0; } bool NoLeftEnd() const { return leftstate == 0; } int Stop() const { return NoRightEnd() ? seqscr->SeqLen()-1 : stop; } int RegionStart() const; int RegionStop() const; virtual StateScores GetStateScores() const = 0; virtual string GetStateName() const = 0; static void SetSeqScores(const SeqScores& s) { seqscr = &s; }protected: int stop, strand; double score; const HMM_State* leftstate; const Terminal* terminal; static const SeqScores* seqscr;};class Intron; class Intergenic;class Exon : public HMM_State{public: static void Init(const string& file, int cgcontent); Exon(int strn, int point, int ph) : HMM_State(strn,point), phase(ph), prevexon(0), mscore(BadScore) {} int Phase() const { return phase; } bool StopInside() const; bool OpenRgn() const; double RgnScore() const; double DenScore() const { return 0; } double ThroughLengthScore() const { return BadScore; } double InitialLengthScore() const { return BadScore; } double ClosingLengthScore() const { return BadScore; } void UpdatePrevExon(const Exon& e); double MScore() const { return mscore; }protected: int phase; const Exon* prevexon; double mscore; static double firstphase[3], internalphase[3][3]; static Lorentz firstlen, internallen, lastlen, singlelen; static bool initialised;};class SingleExon : public Exon{public: ~SingleExon() {} SingleExon(int strn, int point); int MaxLen() const { return singlelen.MaxLen(); } int MinLen() const { return singlelen.MinLen(); } const SingleExon* PrevExon() const { return static_cast<const SingleExon*>(prevexon); } double LengthScore() const; double TermScore() const; double BranchScore(const HMM_State& next) const { return BadScore; } double BranchScore(const Intergenic& next) const; StateScores GetStateScores() const { return CalcStateScores(*this); } string GetStateName() const { return "SingleExon"; }};class FirstExon : public Exon{public: ~FirstExon() {} FirstExon(int strn, int ph, int point); int MaxLen() const { return firstlen.MaxLen(); } int MinLen() const { return firstlen.MinLen(); } const FirstExon* PrevExon() const { return static_cast<const FirstExon*>(prevexon); } double LengthScore() const; double TermScore() const; double BranchScore(const HMM_State& next) const { return BadScore; } double BranchScore(const Intron& next) const; StateScores GetStateScores() const { return CalcStateScores(*this); } string GetStateName() const { return "FirstExon"; }};class InternalExon : public Exon{public: ~InternalExon() {} InternalExon(int strn, int ph, int point); int MaxLen() const { return internallen.MaxLen(); } int MinLen() const { return internallen.MinLen(); } const InternalExon* PrevExon() const { return static_cast<const InternalExon*>(prevexon); } double LengthScore() const; double TermScore() const; double BranchScore(const HMM_State& next) const { return BadScore; } double BranchScore(const Intron& next) const; StateScores GetStateScores() const { return CalcStateScores(*this); } string GetStateName() const { return "InternalExon"; }};class LastExon : public Exon{public: ~LastExon() {} LastExon(int strn, int ph, int point); int MaxLen() const { return lastlen.MaxLen(); } int MinLen() const { return lastlen.MinLen(); } const LastExon* PrevExon() const { return static_cast<const LastExon*>(prevexon); } double LengthScore() const; double TermScore() const; double BranchScore(const HMM_State& next) const { return BadScore; } double BranchScore(const Intergenic& next) const; StateScores GetStateScores() const { return CalcStateScores(*this); } string GetStateName() const { return "LastExon"; }};class Intron : public HMM_State{public: static void Init(const string& file, int cgcontent, int seqlen); ~Intron() {} Intron(int strn, int ph, int point); static int MinIntron() { return intronlen.MinLen(); } // used for introducing frameshifts int MinLen() const { return intronlen.MinLen(); } int MaxLen() const { return intronlen.MaxLen(); } int Phase() const { return phase; } bool OpenRgn() const; double RgnScore() const { return 0; } // Intron scores are substructed from all others double TermScore() const; double DenScore() const { return lnDen[Phase()]; } double LengthScore() const; double ClosingLengthScore() const; double ThroughLengthScore() const { return lnThrough[Phase()]; } double InitialLengthScore() const; double BranchScore(const HMM_State& next) const { return BadScore; } double BranchScore(const LastExon& next) const; double BranchScore(const InternalExon& next) const; bool SplittedStop() const; StateScores GetStateScores() const { return CalcStateScores(*this); } string GetStateName() const { return "Intron"; }protected: int phase; static double lnThrough[3], lnDen[3]; static double lnTerminal, lnInternal; static Lorentz intronlen; static bool initialised;};class Intergenic : public HMM_State{public: static void Init(const string& file, int cgcontent, int seqlen); ~Intergenic() {} Intergenic(int strn, int point); bool OpenRgn() const; double RgnScore() const; double TermScore() const; double DenScore() const { return lnDen; } double LengthScore() const { return intergeniclen.Score(Stop()-Start()+1); } double ClosingLengthScore() const { return intergeniclen.ClosingScore(Stop()-Start()+1); } double ThroughLengthScore() const { return lnThrough; } double InitialLengthScore() const { return lnDen+ClosingLengthScore(); } double BranchScore(const HMM_State& next) const { return BadScore; } double BranchScore(const FirstExon& next) const; double BranchScore(const SingleExon& next) const; bool InAlignment() const; StateScores GetStateScores() const { return CalcStateScores(*this); } string GetStateName() const { return "Intergenic"; }protected: static double lnThrough, lnDen; static double lnSingle, lnMulti; static Lorentz intergeniclen; static bool initialised;};template<class T> class ParseVec : public vector<T>{public: ParseVec() : num(-1) {} int num;};class Gene;class Parse{public: Parse(const SeqScores& ss); const HMM_State* Path() const { return path; } int PrintGenes(CNcbiOstream& to = cout, CNcbiOstream& toprot = cout, bool complete = false) const; void PrintInfo() const; list<Gene> GetGenes() const; //typedef list<Gene>::iterator GenIt;private: const SeqScores& seqscr; const HMM_State* path; ParseVec<Intergenic> igplus, igminus; ParseVec<Intron> inplus[3], inminus[3]; ParseVec<FirstExon> feplus[3], feminus; ParseVec<InternalExon> ieplus[3], ieminus[3]; ParseVec<LastExon> leplus, leminus[3]; ParseVec<SingleExon> seplus, seminus;};class ExonData{ friend list<Gene> Parse::GetGenes() const;public: ExonData(int stt, int stp, int tp) : start(stt), stop(stp), type(tp) {} int Start() const { return start; } int Stop() const { return stop; } int Type() const { return type; } const set<int>& ChainID() const { return chain_id; } const set<int>& ProtID() const { return prot_id; } const TFrameShifts& ExonFrameShifts() const { return fshifts; } bool Identical(const ExonData& ed) const { return (ed.start == start && ed.stop == stop); } bool operator<(const ExonData& ed) const { return (stop < ed.start); } enum {Cds, Utr};private: int start, stop, type; set<int> chain_id, prot_id; TFrameShifts fshifts;};class Gene : public vector<ExonData>{ friend list<Gene> Parse::GetGenes() const;public: Gene(int s, bool l = true, bool r = true, int csf = 0) : strand(s), leftend(l), rightend(r), cds_shift(csf) {} int Strand() const { return strand; } int CDS_Shift() const { return cds_shift; } const IVec& CDS() const { return cds; } bool LeftComplete() const { return leftend; } bool RightComplete() const { return rightend; } bool Complete() const { return (leftend && rightend); }private: int strand, cds_shift; bool leftend, rightend; IVec cds;};void LogicalCheck(const HMM_State& st, const SeqScores& ss);END_NCBI_SCOPE#include "models.hpp"#include "states.hpp"/* * =========================================================================== * $Log: gene_finder.hpp,v $ * Revision 1000.1 2003/11/21 21:31:53 gouriano * PRODUCTION: UPGRADED [ORIGINAL] Dev-tree R1.2 * * Revision 1.2 2003/11/06 15:02:21 ucko * Use iostream interface from ncbistre.hpp for GCC 2.95 compatibility. * * Revision 1.1 2003/10/24 15:07:25 dicuccio * Initial revision * * =========================================================================== */#endif // __GENE_FINDER__HPP
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -