📄 cpgdemo.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: cpgdemo.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:10:59 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.5 * PRODUCTION * =========================================================================== *//* $Id: cpgdemo.cpp,v 1000.2 2004/06/01 18:10:59 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Philip Johnson** File Description: cpgdemo -- demo program for cpg island finder** ===========================================================================*/#include <ncbi_pch.hpp>#include <algo/sequence/cpg.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbiargs.hpp>#include <corelib/ncbienv.hpp>#include <objmgr/object_manager.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>USING_NCBI_SCOPE;USING_SCOPE(objects);class CCpGDemoApp : public CNcbiApplication {public: CCpGDemoApp(void) {DisableArgDescriptions();}; virtual void Init(void); virtual int Run(void);};/*---------------------------------------------------------------------------*/void CCpGDemoApp::Init(void){ auto_ptr<CArgDescriptions> argDescr(new CArgDescriptions); argDescr->AddDefaultKey("gc", "gcContent", "calibrated organism %GC " "content (ie. human: 0.5, rat: 0.48)", CArgDescriptions::eDouble, "0.5"); argDescr->AddDefaultKey("cpg", "obsexp", "observed / expected CpG ratio", CArgDescriptions::eDouble, "0.6"); argDescr->AddDefaultKey("win", "window_size", "width of sliding window", CArgDescriptions::eInteger, "200"); argDescr->AddDefaultKey("len", "min_length", "minimum length of an island", CArgDescriptions::eInteger, "500"); argDescr->AddOptionalKey("m", "merge_isles", "merge adjacent islands within the specified " "distance of each other", CArgDescriptions::eInteger); argDescr->AddOptionalKey("a", "accession", "Single accession", CArgDescriptions::eString); argDescr->AddOptionalKey("i", "infile", "List of accessions", CArgDescriptions::eInputFile); argDescr->AddExtra(0,99, "fasta files", CArgDescriptions::eInputFile); argDescr->SetUsageContext(GetArguments().GetProgramBasename(), "Scans sequences for CpG islands; uses algorithm based upon Takai & Jones, 2002. Output sent to stdout.\n", false); SetupArgDescriptions(argDescr.release());}//---------------------------------------------------------------------------// PRE : open output stream, populated CpG island struct// POST: <cpg start> <cpg stop> <%G + C> <obs/exp CpG>CNcbiOstream& operator<< (CNcbiOstream& o, SCpGIsland i){ unsigned int len = i.m_Stop - i.m_Start + 1; o << i.m_Start << "\t" << i.m_Stop << "\t" << (double) (i.m_C + i.m_G) / len << "\t" << (double) i.m_CG * len / (i.m_C * i.m_G); return o;};//---------------------------------------------------------------------------// PRE : accession to scan, scope in which to resolve accession, CArgs// containing cpg island-scanning parameters// POST: 0 if successful, any islands found in the given accession according// to the arguments have been sent to coutint ScanForCpGs(const string& acc, CScope &scope, const CArgs& args){ CSeq_id seq_id(acc); if (seq_id.Which() == CSeq_id::e_not_set) { cerr << "Invalid seq-id: '" << acc << "'" << endl; return 1; } CBioseq_Handle bioseq_handle = scope.GetBioseqHandle(seq_id); if (!bioseq_handle) { cerr << "Bioseq load FAILED." << endl; return 2; } CSeqVector sv = bioseq_handle.GetSeqVector(CBioseq_Handle::eCoding_Iupac); string seqString; seqString.reserve(sv.size()); sv.GetSeqData(0, sv.size(), seqString); CCpGIslands cpgIsles(seqString.data(), seqString.length(), args["win"].AsInteger(), args["len"].AsInteger(), args["gc"].AsDouble(), args["cpg"].AsDouble()); if (args["m"]) { cpgIsles.MergeIslesWithin(args["m"].AsInteger()); } ITERATE(CCpGIslands::TIsles, i, cpgIsles.GetIsles()) { cout << acc << "\t" << *i << endl; } return 0;}//---------------------------------------------------------------------------// PRE : fasta file to scan, scope in which to resolve accession, CArgs// containing cpg island-scanning parameters// POST: 0 if successful, any islands found in the given accession according// to the arguments have been sent to coutint ScanForCpGs(istream &infile, const CArgs &args){ string localID; infile >> localID; //skip rest of line infile.ignore(numeric_limits<int>::max(), '\n'); while (infile) { if (localID[0] != '>') { ERR_POST(Critical << "FASTA file garbled around '" <<localID<<"'"); return 1; } //read in nucleotides string seqString, lineBuff; while (!infile.eof() && infile.peek() != '>') { getline(infile, lineBuff); if (seqString.size() + lineBuff.size() > seqString.capacity()) seqString.reserve(seqString.capacity() * 2); seqString += lineBuff; } //scan CCpGIslands cpgIsles(seqString.data(), seqString.length(), args["win"].AsInteger(), args["len"].AsInteger(), args["gc"].AsDouble(), args["cpg"].AsDouble()); if (args["m"]) { cpgIsles.MergeIslesWithin(args["m"].AsInteger()); } //output ITERATE(CCpGIslands::TIsles, i, cpgIsles.GetIsles()) { cout << localID << "\t" << *i << endl; } infile >> localID; infile.ignore(numeric_limits<int>::max(), '\n'); } return 0;}/*---------------------------------------------------------------------------*/int CCpGDemoApp::Run(void){ const CArgs& args = GetArgs(); CRef<CObjectManager> objMgr(new CObjectManager); objMgr->RegisterDataLoader(*new CGBDataLoader("GENBANK"), CObjectManager::eDefault); CScope scope(*objMgr); scope.AddDefaults(); int retCode = 0; cout.precision(2); cout.setf(ios::fixed, ios::floatfield); cout << "# CpG islands. Win:" << args["win"].AsInteger() << "; Min len:" << args["len"].AsInteger() << "; Min %GC:" << args["gc"].AsDouble() << "; Min obs/exp CpG: " << args["cpg"].AsDouble(); if (args["m"]) { cout << "; Merge islands within: " << args["m"].AsInteger(); } cout << endl; cout << "# label\tisle_start\tisle_stop\t%GC\tobs/exp CpG" << endl; if (args["a"]) { retCode = ScanForCpGs(args["a"].AsString(), scope, args); } if (args["i"]) { istream &infile = args["i"].AsInputFile(); string acc; infile >> acc; while (infile.good()) { cerr << "Processing " << acc << endl; if (ScanForCpGs(acc, scope, args) != 0) { retCode = 3; } infile >> acc; } } for (unsigned int i = 1; i <= args.GetNExtra(); ++i) { if (!ScanForCpGs(args[i].AsInputFile(), args)) { retCode = 3; } } return retCode;}//---------------------------------------------------------------------------int main(int argc, char** argv){ CCpGDemoApp theApp; return theApp.AppMain(argc, argv, NULL, eDS_Default, 0);}/* * =========================================================================== * $Log: cpgdemo.cpp,v $ * Revision 1000.2 2004/06/01 18:10:59 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.5 * * Revision 1.5 2004/05/21 21:41:04 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.4 2004/01/07 17:39:28 vasilche * Fixed include path to genbank loader. * * Revision 1.3 2003/12/12 20:06:34 johnson * accommodate MSVC 7 refactoring; also made more features accessible from * command line * * Revision 1.2 2003/06/17 15:35:12 johnson * remove stray char * * Revision 1.1 2003/06/17 15:12:24 johnson * initial revision * * =========================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -