📄 gnomon.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: gnomon.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:08:33 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.4 * PRODUCTION * =========================================================================== *//* $Id: gnomon.cpp,v 1000.2 2004/06/01 18:08:33 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Mike DiCuccio * * File Description: * */#include <ncbi_pch.hpp>#include <algo/gnomon/gnomon.hpp>#include "gene_finder.hpp"#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Packed_seqint.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objects/seqfeat/Cdregion.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(ncbi::objects);CGnomon::CGnomon(): m_Repeats(false), m_ScanRange(CRange<TSeqPos>::GetWhole()){}CGnomon::~CGnomon(){}//// set our sequence information//void CGnomon::SetSequence(const vector<char>& seq){ m_Seq = seq;}void CGnomon::SetSequence(const string& str){ m_Seq.clear(); m_Seq.reserve(str.length()); ITERATE (string, iter, str) { m_Seq.push_back(*iter); }}void CGnomon::SetSequence(const CSeqVector& vec){ CSeqVector my_vec(vec); my_vec.SetCoding(CSeq_data::e_Iupacna); m_Seq.clear(); m_Seq.reserve(my_vec.size()); CSeqVector_CI iter(my_vec); for ( ; iter; ++iter) { m_Seq.push_back(*iter); }}//// set the model data file// we save this as a string, as it is read multiple times//void CGnomon::SetModelData(const string& data_file){ m_ModelFile = data_file;}//// set the a priori information//void CGnomon::SetAprioriInfo(const string& file){ m_Clusters.reset(new CClusterSet()); if (file.empty()) { return; } CNcbiIfstream from(file.c_str()); from >> *m_Clusters;}//// set the frame shift information//void CGnomon::SetFrameShiftInfo(const string& file){ m_Fshifts.reset(new TFrameShifts()); if (file.empty()) { return; } CNcbiIfstream from(file.c_str()); int loc; while(from >> loc) { char is_i; char c = 'N'; from >> is_i; if(is_i == '+') { from >> c; } m_Fshifts->push_back(CFrameShiftInfo(loc, (is_i == '+'), c)); }}//// set the repeats flag//void CGnomon::SetRepeats(bool b){ m_Repeats = b;}//// set our scan range//void CGnomon::SetScanRange(const CRange<TSeqPos>& range){ m_ScanRange = range;}//// set the scan start position//void CGnomon::SetScanFrom(TSeqPos from){ m_ScanRange.SetFrom(from);}//// set the scan stop position//void CGnomon::SetScanTo(TSeqPos to){ m_ScanRange.SetTo(to);}//// helper function - convert a vector<IPair> into a compact CSeq_loc//static inlineCRef<CSeq_loc> s_ExonDataToLoc(const vector<IPair>& vec, ENa_strand strand, CSeq_id& id){ CRef<CSeq_loc> loc(new CSeq_loc()); CPacked_seqint::Tdata data; ITERATE (vector<IPair>, iter, vec) { CRef<CSeq_interval> ival(new CSeq_interval()); ival->SetId(id); ival->SetStrand(strand); ival->SetFrom(iter->first); ival->SetTo (iter->second); data.push_back(ival); } if (data.size() == 1) { loc->SetInt(*data.front()); } else { loc->SetPacked_int().Set().swap(data); } return loc;}//// Run() performs all the housekeeping necessary for GNOMON to work//void CGnomon::Run(void){ // compute the GC content of the sequence TSeqPos gc_content = 0; ITERATE (vector<char>, iter, m_Seq) { int c = toupper(*iter); if (c == 'C' || c == 'G') { ++gc_content; } } gc_content = static_cast<int> (gc_content*100.0 / (m_Seq.size()) + 0.4999999); // // determine our scan range // TSeqPos from = m_ScanRange.GetFrom(); TSeqPos to = m_ScanRange.GetTo(); if (m_ScanRange == CRange<TSeqPos>::GetWhole()) { from = 0; to = m_Seq.size(); } // TSeqPos is unsigned, so from is always > 0 to = min(to, (TSeqPos)m_Seq.size()); if (from > to) { swap (from, to); } // // GNOMON setup // auto_ptr<Terminal> donorp; if(m_ModelFile.find("Human.inp") == m_ModelFile.length() - 9) { donorp.reset(new MDD_Donor(m_ModelFile, gc_content)); } else { donorp.reset(new WAM_Donor<2>(m_ModelFile,gc_content)); } WAM_Acceptor<2> acceptor (m_ModelFile, gc_content); WMM_Start start (m_ModelFile, gc_content); WAM_Stop stop (m_ModelFile, gc_content); MC3_CodingRegion<5> cdr (m_ModelFile, gc_content); MC_NonCodingRegion<5> intron_reg (m_ModelFile, gc_content); MC_NonCodingRegion<5> intergenic_reg(m_ModelFile, gc_content); Intron::Init (m_ModelFile, gc_content, to - from + 1); Intergenic::Init(m_ModelFile, gc_content, to - from + 1); Exon::Init (m_ModelFile, gc_content); bool leftwall = true; bool rightwall = true; if ( !m_Clusters.get() ) { m_Clusters.reset(new CClusterSet()); } if ( !m_Fshifts.get() ) { m_Fshifts.reset(new TFrameShifts()); } SeqScores ss(acceptor, *donorp, start, stop, cdr, intron_reg, intergenic_reg, m_Seq, from, to - from + 1, *m_Clusters, *m_Fshifts, m_Repeats, leftwall, rightwall, m_ModelFile); HMM_State::SetSeqScores(ss); m_Annot.Reset(new CSeq_annot()); m_Annot->AddName("GNOMON gene scan output"); Parse parse(ss); list<Gene> genes = parse.GetGenes(); CRef<CSeq_id> id(new CSeq_id("lcl|gnomon")); CSeq_annot::C_Data::TFtable& ftable = m_Annot->SetData().SetFtable(); ITERATE (list<Gene>, it, genes) { const Gene& igene = *it; int strand = igene.Strand(); vector<IPair> mrna_vec; vector<IPair> cds_vec; for (int j = 0; j < igene.size(); ++j) { const ExonData& exon = igene[j]; int a = exon.Start(); int b = exon.Stop(); if (j == 0 || igene[j-1].Stop()+1 != a) { // new exon mrna_vec.push_back(IPair(a,b)); } else { // attaching cds-part to utr-part mrna_vec.back().second = b; } if (exon.Type() == ExonData::Cds) { cds_vec.push_back(IPair(a,b)); } } // stop-codon removal from cds if (strand == Plus && igene.RightComplete()) { cds_vec.back().second -= 3; } if (strand == Minus && igene.LeftComplete()) { cds_vec.front().first += 3; } // // create our mRNA CRef<CSeq_feat> feat_mrna(new CSeq_feat()); feat_mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA); feat_mrna->SetLocation (*s_ExonDataToLoc(mrna_vec, (strand == Plus ? eNa_strand_plus : eNa_strand_minus), *id)); // // create the accompanying CDS CRef<CSeq_feat> feat_cds(new CSeq_feat); feat_cds->SetData().SetCdregion(); feat_cds->SetLocation (*s_ExonDataToLoc(cds_vec, (strand == Plus ? eNa_strand_plus : eNa_strand_minus), *id)); ftable.push_back(feat_mrna); ftable.push_back(feat_cds); }}CRef<CSeq_annot> CGnomon::GetAnnot(void) const{ return m_Annot;}END_NCBI_SCOPE/* * =========================================================================== * $Log: gnomon.cpp,v $ * Revision 1000.2 2004/06/01 18:08:33 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.4 * * Revision 1.4 2004/05/21 21:41:03 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.3 2003/11/06 00:51:05 ucko * Adjust usage of min for platforms with 64-bit size_t. * * Revision 1.2 2003/11/05 21:15:24 ucko * Fixed usage of ITERATE. (m_Seq is a vector, not a string.) * * Revision 1.1 2003/10/24 15:07:25 dicuccio * Initial revision * * =========================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -