📄 blast2seq.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: blast2seq.cpp,v $ * PRODUCTION Revision 1000.5 2004/06/01 18:06:32 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.46 * PRODUCTION * =========================================================================== *//* $Id: blast2seq.cpp,v 1000.5 2004/06/01 18:06:32 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Christiam Camacho * *//** @file blast2seq.cpp * Main driver for blast2sequences C++ interface */static char const rcsid[] = "$Id: blast2seq.cpp,v 1000.5 2004/06/01 18:06:32 gouriano Exp $";#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbienv.hpp>#include <corelib/ncbiargs.hpp>#include <serial/iterator.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objmgr/util/sequence.hpp>#include <corelib/ncbitime.hpp>#include <objtools/readers/fasta.hpp>#include <algo/blast/api/bl2seq.hpp>#include <algo/blast/api/blast_options.hpp>#include <algo/blast/api/blast_nucl_options.hpp>#include "blast_input.hpp"#include <objects/seqalign/Seq_align_set.hpp>USING_NCBI_SCOPE;USING_SCOPE(blast);USING_SCOPE(objects);//////////////////////////////////////////////////////////////////////////////// CBlast2seqApplication: command line blast2sequences application/// @todo Implement formatting/// @todo refactor command line options, so that only those relevant to a/// particular program are shown (e.g: cvs -H command). This should be/// reusable by all BLAST command line clientsclass CBlast2seqApplication : public CNcbiApplication{private: virtual void Init(void); virtual int Run(void); virtual void Exit(void); void InitObjMgr(void); EProgram GetBlastProgramNum(const string& prog); CBlastOptionsHandle* ProcessCommandLineArgs() THROWS((CBlastException));#ifndef NDEBUG FILE* GetOutputFilePtr(void); // needed for debugging only#endif CRef<CObjectManager> m_ObjMgr; CRef<CScope> m_Scope;};void CBlast2seqApplication::Init(void){ HideStdArgs(fHideLogfile | fHideConffile | fHideVersion); auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions); arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "Compares 2 sequence using the BLAST algorithm"); // Program type arg_desc->AddKey("program", "p", "Type of BLAST program", CArgDescriptions::eString); arg_desc->SetConstraint ("program", &(*new CArgAllow_Strings, "blastp", "blastn", "blastx", "tblastn", "tblastx")); // Query sequence arg_desc->AddDefaultKey("query", "q", "Query file name", CArgDescriptions::eInputFile, "-", CArgDescriptions::fPreOpen); // Subject(s) sequence(s) arg_desc->AddKey("subject", "s", "Subject(s) file name", CArgDescriptions::eInputFile, CArgDescriptions::fPreOpen); // Copied from blast_app arg_desc->AddDefaultKey("strand", "strand", "Query strands to search: 1 forward, 2 reverse, 0,3 both", CArgDescriptions::eInteger, "0"); arg_desc->SetConstraint("strand", new CArgAllow_Integers(0,3)); arg_desc->AddDefaultKey("filter", "filter", "Filtering option", CArgDescriptions::eString, "T"); arg_desc->AddDefaultKey("lcase", "lcase", "Should lower case be masked?", CArgDescriptions::eBoolean, "F"); arg_desc->AddDefaultKey("lookup", "lookup", "Type of lookup table: 0 default, 1 megablast", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("matrix", "matrix", "Scoring matrix name", CArgDescriptions::eString, "BLOSUM62"); arg_desc->AddDefaultKey("mismatch", "penalty", "Penalty score for a mismatch", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("match", "reward", "Reward score for a match", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("word", "wordsize", "Word size", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("templen", "templen", "Discontiguous word template length", CArgDescriptions::eInteger, "0"); arg_desc->SetConstraint("templen", &(*new CArgAllow_Strings, "0", "16", "18", "21")); arg_desc->AddDefaultKey("templtype", "templtype", "Discontiguous word template type", CArgDescriptions::eInteger, "0"); arg_desc->SetConstraint("templtype", new CArgAllow_Integers(0,2)); arg_desc->AddDefaultKey("thresh", "threshold", "Score threshold for neighboring words", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("window","window", "Window size for two-hit extension", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("scantype", "scantype", "Method for scanning the database: 0 traditional, 1 AG", CArgDescriptions::eInteger, "1"); arg_desc->SetConstraint("scantype", new CArgAllow_Integers(0,1)); arg_desc->AddDefaultKey("varword", "varword", "Should variable word size be used?", CArgDescriptions::eBoolean, "F"); arg_desc->AddDefaultKey("stride","stride", "Database scanning stride", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("xungap", "xungapped", "X-dropoff value for ungapped extensions", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("ungapped", "ungapped", "Perform only an ungapped alignment search?", CArgDescriptions::eBoolean, "F"); arg_desc->AddDefaultKey("greedy", "greedy", "Use greedy algorithm for gapped extensions: 0 no, 1 one-step, 2 two-step, 3 two-step with ungapped", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("gopen", "gapopen", "Penalty for opening a gap", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("gext", "gapext", "Penalty for extending a gap", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("xgap", "xdrop", "X-dropoff value for preliminary gapped extensions", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("xfinal", "xfinal", "X-dropoff value for final gapped extensions with traceback", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("evalue", "evalue", "E-value threshold for saving hits", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("searchsp", "searchsp", "Virtual search space to be used for statistical calculations", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("perc", "percident", "Percentage of identities cutoff for saving hits", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("descr", "descriptions", "How many matching sequence descriptions to show?", CArgDescriptions::eInteger, "500"); arg_desc->AddDefaultKey("align", "alignments", "How many matching sequence alignments to show?", CArgDescriptions::eInteger, "250"); arg_desc->AddDefaultKey("out", "out", "File name for writing output", CArgDescriptions::eOutputFile, "-", CArgDescriptions::fPreOpen); arg_desc->AddDefaultKey("format", "format", "How to format the results?", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("html", "html", "Produce HTML output?", CArgDescriptions::eBoolean, "F"); arg_desc->AddDefaultKey("gencode", "gencode", "Query genetic code", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("dbgencode", "dbgencode", "Database genetic code", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("maxintron", "maxintron", "Longest allowed intron length for linking HSPs", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("frameshift", "frameshift", "Frame shift penalty (blastx only)", CArgDescriptions::eInteger, "0"); arg_desc->AddOptionalKey("asnout", "seqalignasn", "File name for writing the seqalign results in ASN.1 form", CArgDescriptions::eOutputFile); // Debug parameters arg_desc->AddFlag("trace", "Tracing enabled?", true); SetupArgDescriptions(arg_desc.release());}void CBlast2seqApplication::InitObjMgr(void){ m_ObjMgr.Reset(new CObjectManager); m_ObjMgr->RegisterDataLoader(*new CGBDataLoader, CObjectManager::eDefault);}EProgramCBlast2seqApplication::GetBlastProgramNum(const string& prog){ if (prog == "blastp") return eBlastp; if (prog == "blastn") return eBlastn; if (prog == "blastx") return eBlastx; if (prog == "tblastn") return eTblastn; if (prog == "tblastx") return eTblastx; return eBlastProgramMax;}CBlastOptionsHandle*CBlast2seqApplication::ProcessCommandLineArgs() THROWS((CBlastException)){ CArgs args = GetArgs(); EProgram prog = GetBlastProgramNum(args["program"].AsString()); CBlastOptionsHandle* retval = CBlastOptionsFactory::Create(prog); if ( !retval ) { NCBI_THROW(CBlastException, eOutOfMemory, ""); } CBlastOptions& opt = retval->SetOptions(); if (args["strand"].AsInteger()) { switch (args["strand"].AsInteger()) { case 1: opt.SetStrandOption(eNa_strand_plus); break; case 2: opt.SetStrandOption(eNa_strand_minus); break; case 3: opt.SetStrandOption(eNa_strand_both); break; default: abort(); } } opt.SetFilterString(args["filter"].AsString().c_str()); // FIXME: Handle lcase masking if (args["lookup"].AsInteger()) { opt.SetLookupTableType(args["lookup"].AsInteger()); } if (args["matrix"]) { opt.SetMatrixName(args["matrix"].AsString().c_str()); } if (args["mismatch"].AsInteger()) { opt.SetMismatchPenalty(args["mismatch"].AsInteger()); } if (args["match"].AsInteger()) { opt.SetMatchReward(args["match"].AsInteger()); } if (args["word"].AsInteger()) { opt.SetWordSize(args["word"].AsInteger()); } if (args["templen"].AsInteger()) { opt.SetMBTemplateLength(args["templen"].AsInteger()); } if (args["templtype"].AsInteger()) { opt.SetMBTemplateType(args["templtype"].AsInteger()); } if (args["thresh"].AsInteger()) { opt.SetWordThreshold(args["thresh"].AsInteger()); } if (args["window"].AsInteger()) { opt.SetWindowSize(args["window"].AsInteger()); } // The next 3 apply to nucleotide searches only string program = args["program"].AsString(); if (program == "blastn") { // Setting seed extension method involves changing the scanning // stride as well, which is handled in the derived // CBlastNucleotideOptionsHandle class, but not in the base // CBlastOptionsHandle class. CBlastNucleotideOptionsHandle* opts_handle = dynamic_cast<CBlastNucleotideOptionsHandle*>(retval); if (!args["templen"].AsInteger()) { opt.SetVariableWordSize(args["varword"].AsBoolean()); switch(args["scantype"].AsInteger()) { case 1: opts_handle->SetSeedExtensionMethod(eRightAndLeft); break; default: opts_handle->SetSeedExtensionMethod(eRight); break; } } else { // Discontiguous Mega BLAST: only one extension method. opts_handle->SetSeedExtensionMethod(eRight); } // Override the scan step value if it is set by user if (args["stride"].AsInteger()) { opt.SetScanStep(args["stride"].AsInteger()); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -