📄 blast_app.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_app.cpp,v $ * PRODUCTION Revision 1000.8 2004/06/01 18:06:35 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.49 * PRODUCTION * =========================================================================== */static char const rcsid[] = "$Id: blast_app.cpp,v 1000.8 2004/06/01 18:06:35 gouriano Exp $";/** ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's offical duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================*//*****************************************************************************File name: blast_app.cppAuthor: Ilya DondoshanskyContents: C++ driver for running BLAST******************************************************************************/#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbifile.hpp>#include <objects/seq/seq__.hpp>#include <objects/seq/seqport_util.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objmgr/object_manager.hpp>#include <objtools/data_loaders/blastdb/bdbloader.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objmgr/util/sequence.hpp>#include <algo/blast/api/blast_options.hpp>#include <algo/blast/api/blast_nucl_options.hpp>#include <algo/blast/api/db_blast.hpp>#include <algo/blast/api/blast_aux.hpp>#include "blast_input.hpp"#include <algo/blast/api/seqdb_src.hpp>#include <objtools/alnmgr/util/blast_format.hpp>#ifndef NCBI_C_TOOLKIT#define NCBI_C_TOOLKIT#endif// C include files#include <algo/blast/core/blast_setup.h>#include <algo/blast/core/blast_util.h>#include <algo/blast/core/lookup_wrap.h>#include <algo/blast/core/blast_engine.h>// For writing out seqalign only#include <ctools/asn_converter.hpp>#include <objalign.h>USING_NCBI_SCOPE;USING_SCOPE(blast);USING_SCOPE(objects);class CBlastApplication : public CNcbiApplication{private: virtual void Init(void); virtual int Run(void); virtual void Exit(void); void InitScope(void); void InitOptions(void); void SetOptions(const CArgs& args); void ProcessCommandLineArgs(CBlastOptionsHandle* opt, BlastSeqSrc* bssp, RPSInfo *rps_info); int BlastSearch(void); void RegisterBlastDbLoader(char* dbname, bool is_na); void FormatResults(CDbBlast& blaster, TSeqAlignVector& seqalignv); CRef<CObjectManager> m_ObjMgr; CRef<CScope> m_Scope;};void CBlastApplication::Init(void){ HideStdArgs(fHideLogfile | fHideConffile | fHideVersion); auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions); arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "basic local alignment search tool"); arg_desc->AddKey("program", "program", "Type of BLAST program", CArgDescriptions::eString); arg_desc->SetConstraint ("program", &(*new CArgAllow_Strings, "blastp", "blastn", "blastx", "tblastn", "tblastx", "rpsblast", "rpstblastn")); arg_desc->AddDefaultKey("query", "query", "Query file name", CArgDescriptions::eInputFile, "stdin"); arg_desc->AddKey("db", "database", "BLAST database name", CArgDescriptions::eString); arg_desc->AddDefaultKey("strand", "strand", "Query strands to search: 1 forward, 2 reverse, 0,3 both", CArgDescriptions::eInteger, "0"); arg_desc->SetConstraint("strand", new CArgAllow_Integers(0,3)); arg_desc->AddDefaultKey("filter", "filter", "Filtering option", CArgDescriptions::eString, "T"); arg_desc->AddDefaultKey("lcase", "lcase", "Should lower case be masked?", CArgDescriptions::eBoolean, "F"); arg_desc->AddDefaultKey("lookup", "lookup", "Type of lookup table: 0 default, 1 megablast", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("matrix", "matrix", "Scoring matrix name", CArgDescriptions::eString, "BLOSUM62"); arg_desc->AddDefaultKey("mismatch", "penalty", "Penalty score for a mismatch", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("match", "reward", "Reward score for a match", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("word", "wordsize", "Word size", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("templen", "templen", "Discontiguous word template length", CArgDescriptions::eInteger, "0"); arg_desc->SetConstraint("templen", &(*new CArgAllow_Strings, "0", "16", "18", "21")); arg_desc->AddDefaultKey("templtype", "templtype", "Discontiguous word template type", CArgDescriptions::eInteger, "0"); arg_desc->SetConstraint("templtype", new CArgAllow_Integers(0,2)); arg_desc->AddDefaultKey("thresh", "threshold", "Score threshold for neighboring words", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("window","window", "Window size for two-hit extension", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("scantype", "scantype", "Method for scanning the database: 0 traditional, 1 AG", CArgDescriptions::eInteger, "1"); arg_desc->SetConstraint("scantype", new CArgAllow_Integers(0,1)); arg_desc->AddDefaultKey("varword", "varword", "Should variable word size be used?", CArgDescriptions::eBoolean, "F"); arg_desc->AddDefaultKey("stride","stride", "Database scanning stride", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("xungap", "xungapped", "X-dropoff value for ungapped extensions", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("ungapped", "ungapped", "Perform only an ungapped alignment search?", CArgDescriptions::eBoolean, "F"); arg_desc->AddDefaultKey("greedy", "greedy", "Use greedy algorithm for gapped extensions: 0 no, 1 one-step, 2 two-step, 3 two-step with ungapped", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("gopen", "gapopen", "Penalty for opening a gap", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("gext", "gapext", "Penalty for extending a gap", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("xgap", "xdrop", "X-dropoff value for preliminary gapped extensions", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("xfinal", "xfinal", "X-dropoff value for final gapped extensions with traceback", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("evalue", "evalue", "E-value threshold for saving hits", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("searchsp", "searchsp", "Virtual search space to be used for statistical calculations", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("perc", "percident", "Percentage of identities cutoff for saving hits", CArgDescriptions::eDouble, "0"); arg_desc->AddDefaultKey("hitlist", "hitlist_size", "How many best matching sequences to find?", CArgDescriptions::eInteger, "500"); arg_desc->AddDefaultKey("descr", "descriptions", "How many matching sequence descriptions to show?", CArgDescriptions::eInteger, "500"); arg_desc->AddDefaultKey("align", "alignments", "How many matching sequence alignments to show?", CArgDescriptions::eInteger, "250"); arg_desc->AddOptionalKey("out", "outfile", "File name for writing output", CArgDescriptions::eOutputFile); arg_desc->AddDefaultKey("format", "format", "How to format the results?", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("html", "html", "Produce HTML output?", CArgDescriptions::eBoolean, "F"); arg_desc->AddDefaultKey("gencode", "gencode", "Query genetic code", CArgDescriptions::eInteger, "1"); arg_desc->AddDefaultKey("dbgencode", "dbgencode", "Database genetic code", CArgDescriptions::eInteger, "1"); arg_desc->AddDefaultKey("maxintron", "maxintron", "Longest allowed intron length for linking HSPs", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("frameshift", "frameshift", "Frame shift penalty (blastx only)", CArgDescriptions::eInteger, "0"); arg_desc->AddOptionalKey("asnout", "seqalignasn", "File name for writing the seqalign results in ASN.1 form", CArgDescriptions::eOutputFile); arg_desc->AddDefaultKey("qstart", "query_start", "Starting offset in query location", CArgDescriptions::eInteger, "0"); arg_desc->AddDefaultKey("qend", "query_end", "Ending offset in query location", CArgDescriptions::eInteger, "0"); arg_desc->AddOptionalKey("pattern", "phipattern", "Pattern for PHI-BLAST", CArgDescriptions::eString); arg_desc->AddOptionalKey("pssm", "pssm", "File name for uploading a PSSM for PSI BLAST seach", CArgDescriptions::eInputFile); arg_desc->AddOptionalKey("dbrange", "databaserange", "Range of ordinal ids in the BLAST database.\n" "Format: \"oid1 oid2\"", CArgDescriptions::eString); SetupArgDescriptions(arg_desc.release());}void CBlastApplication::InitScope(void){ if (m_Scope.Empty()) { m_ObjMgr.Reset(new CObjectManager()); m_ObjMgr->RegisterDataLoader(*new CGBDataLoader("ID", 0, 2), CObjectManager::eDefault); m_Scope.Reset(new CScope(*m_ObjMgr)); m_Scope->AddDefaults(); _TRACE("Blast2seqApp: Initializing scope"); }}void CBlastApplication::RegisterBlastDbLoader(char *dbname, bool db_is_na){ m_ObjMgr->RegisterDataLoader(*new CBlastDbDataLoader("BLASTDB", dbname, db_is_na? (CBlastDbDataLoader::eNucleotide) : (CBlastDbDataLoader::eProtein)), CObjectManager::eDefault);}voidCBlastApplication::ProcessCommandLineArgs(CBlastOptionsHandle* opts_handle, BlastSeqSrc* bssp, RPSInfo *rps_info){ CArgs args = GetArgs(); CBlastOptions& opt = opts_handle->SetOptions(); EProgram program_number = opt.GetProgram(); if (args["strand"].AsInteger()) { switch (args["strand"].AsInteger()) { case 1: opt.SetStrandOption(eNa_strand_plus); break; case 2: opt.SetStrandOption(eNa_strand_minus); break; case 3: opt.SetStrandOption(eNa_strand_both); break; default: abort(); } } opt.SetFilterString(args["filter"].AsString().c_str()); // FIXME: Handle lcase masking // If lookup table type argument value is 0, the type will be set correctly // automatically. Value 1 corresponds to megablast lookup table; switch (args["lookup"].AsInteger()) { case 1: opt.SetLookupTableType(MB_LOOKUP_TABLE); break; default: break; } if (program_number == eRPSBlast || program_number == eRPSTblastn) { ASSERT(rps_info != NULL); opt.SetGapOpeningCost(rps_info->aux_info.gap_open_penalty); opt.SetGapExtensionCost(rps_info->aux_info.gap_extend_penalty); } else { if (args["matrix"]) { opt.SetMatrixName(args["matrix"].AsString().c_str()); } if (args["gopen"].AsInteger() || args["greedy"].AsInteger()) { opt.SetGapOpeningCost(args["gopen"].AsInteger()); } if (args["gext"].AsInteger() || args["greedy"].AsInteger()) { opt.SetGapExtensionCost(args["gext"].AsInteger()); } } if (args["mismatch"].AsInteger()) { opt.SetMismatchPenalty(args["mismatch"].AsInteger()); } if (args["match"].AsInteger()) { opt.SetMatchReward(args["match"].AsInteger()); } if (args["word"].AsInteger()) { opt.SetWordSize(args["word"].AsInteger()); } if (args["templen"].AsInteger()) { opt.SetMBTemplateLength(args["templen"].AsInteger()); } if (args["templtype"].AsInteger()) { opt.SetMBTemplateType(args["templtype"].AsInteger()); } if (args["thresh"].AsInteger()) { opt.SetWordThreshold(args["thresh"].AsInteger()); } if (args["window"].AsInteger()) { opt.SetWindowSize(args["window"].AsInteger()); } // The next 3 apply to nucleotide searches only string program = args["program"].AsString(); if (program == "blastn") { // Setting seed extension method involves changing the scanning // stride as well, which is handled in the derived // CBlastNucleotideOptionsHandle class, but not in the base // CBlastOptionsHandle class. CBlastNucleotideOptionsHandle* nucl_handle = dynamic_cast<CBlastNucleotideOptionsHandle*>(opts_handle); if (!args["templen"].AsInteger()) { opt.SetVariableWordSize(args["varword"].AsBoolean()); switch(args["scantype"].AsInteger()) { case 1: nucl_handle->SetSeedExtensionMethod(eRightAndLeft); break; default: nucl_handle->SetSeedExtensionMethod(eRight); break; } } else { // Discontiguous Mega BLAST: only one extension method. nucl_handle->SetSeedExtensionMethod(eRight); } // Override the scan step value if it is set by user if (args["stride"].AsInteger()) { opt.SetScanStep(args["stride"].AsInteger()); } } if (args["xungap"].AsDouble()) { opt.SetXDropoff(args["xungap"].AsDouble()); } if (args["ungapped"].AsBoolean()) { opt.SetGappedMode(false); } switch (args["greedy"].AsInteger()) { case 1: /* Immediate greedy gapped extension with traceback */ opt.SetGapExtnAlgorithm(eGreedyWithTracebackExt); opt.SetUngappedExtension(false); break; case 2: /* Two-step greedy extension, no ungapped extension */ opt.SetGapExtnAlgorithm(eGreedyExt); opt.SetUngappedExtension(false); break; case 3: /* Two-step greedy extension after ungapped extension*/ opt.SetGapExtnAlgorithm(eGreedyExt); opt.SetUngappedExtension(true); break; default: break; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -