⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_app.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: blast_app.cpp,v $ * PRODUCTION Revision 1000.8  2004/06/01 18:06:35  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.49 * PRODUCTION * =========================================================================== */static char const rcsid[] = "$Id: blast_app.cpp,v 1000.8 2004/06/01 18:06:35 gouriano Exp $";/** ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's offical duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================*//*****************************************************************************File name: blast_app.cppAuthor: Ilya DondoshanskyContents: C++ driver for running BLAST******************************************************************************/#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbifile.hpp>#include <objects/seq/seq__.hpp>#include <objects/seq/seqport_util.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objmgr/object_manager.hpp>#include <objtools/data_loaders/blastdb/bdbloader.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objmgr/util/sequence.hpp>#include <algo/blast/api/blast_options.hpp>#include <algo/blast/api/blast_nucl_options.hpp>#include <algo/blast/api/db_blast.hpp>#include <algo/blast/api/blast_aux.hpp>#include "blast_input.hpp"#include <algo/blast/api/seqdb_src.hpp>#include <objtools/alnmgr/util/blast_format.hpp>#ifndef NCBI_C_TOOLKIT#define NCBI_C_TOOLKIT#endif// C include files#include <algo/blast/core/blast_setup.h>#include <algo/blast/core/blast_util.h>#include <algo/blast/core/lookup_wrap.h>#include <algo/blast/core/blast_engine.h>// For writing out seqalign only#include <ctools/asn_converter.hpp>#include <objalign.h>USING_NCBI_SCOPE;USING_SCOPE(blast);USING_SCOPE(objects);class CBlastApplication : public CNcbiApplication{private:    virtual void Init(void);    virtual int Run(void);    virtual void Exit(void);    void InitScope(void);    void InitOptions(void);    void SetOptions(const CArgs& args);    void ProcessCommandLineArgs(CBlastOptionsHandle* opt,                                 BlastSeqSrc* bssp, RPSInfo *rps_info);    int BlastSearch(void);    void RegisterBlastDbLoader(char* dbname, bool is_na);    void FormatResults(CDbBlast& blaster, TSeqAlignVector& seqalignv);    CRef<CObjectManager> m_ObjMgr;    CRef<CScope>         m_Scope;};void CBlastApplication::Init(void){    HideStdArgs(fHideLogfile | fHideConffile | fHideVersion);    auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);    arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "basic local alignment search tool");    arg_desc->AddKey("program", "program", "Type of BLAST program",        CArgDescriptions::eString);    arg_desc->SetConstraint        ("program", &(*new CArgAllow_Strings,                 "blastp", "blastn", "blastx", "tblastn", "tblastx",                 "rpsblast", "rpstblastn"));    arg_desc->AddDefaultKey("query", "query", "Query file name",                     CArgDescriptions::eInputFile, "stdin");    arg_desc->AddKey("db", "database", "BLAST database name",                     CArgDescriptions::eString);    arg_desc->AddDefaultKey("strand", "strand",         "Query strands to search: 1 forward, 2 reverse, 0,3 both",        CArgDescriptions::eInteger, "0");    arg_desc->SetConstraint("strand", new CArgAllow_Integers(0,3));    arg_desc->AddDefaultKey("filter", "filter", "Filtering option",                            CArgDescriptions::eString, "T");    arg_desc->AddDefaultKey("lcase", "lcase", "Should lower case be masked?",                            CArgDescriptions::eBoolean, "F");    arg_desc->AddDefaultKey("lookup", "lookup",         "Type of lookup table: 0 default, 1 megablast",        CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("matrix", "matrix", "Scoring matrix name",                            CArgDescriptions::eString, "BLOSUM62");    arg_desc->AddDefaultKey("mismatch", "penalty", "Penalty score for a mismatch",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("match", "reward", "Reward score for a match",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("word", "wordsize", "Word size",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("templen", "templen",         "Discontiguous word template length",        CArgDescriptions::eInteger, "0");    arg_desc->SetConstraint("templen",                             &(*new CArgAllow_Strings, "0", "16", "18", "21"));    arg_desc->AddDefaultKey("templtype", "templtype",         "Discontiguous word template type",        CArgDescriptions::eInteger, "0");    arg_desc->SetConstraint("templtype", new CArgAllow_Integers(0,2));    arg_desc->AddDefaultKey("thresh", "threshold",         "Score threshold for neighboring words",        CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("window","window", "Window size for two-hit extension",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("scantype", "scantype",         "Method for scanning the database: 0 traditional, 1 AG",        CArgDescriptions::eInteger, "1");    arg_desc->SetConstraint("scantype", new CArgAllow_Integers(0,1));    arg_desc->AddDefaultKey("varword", "varword",         "Should variable word size be used?",        CArgDescriptions::eBoolean, "F");    arg_desc->AddDefaultKey("stride","stride", "Database scanning stride",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("xungap", "xungapped",         "X-dropoff value for ungapped extensions",        CArgDescriptions::eDouble, "0");    arg_desc->AddDefaultKey("ungapped", "ungapped",         "Perform only an ungapped alignment search?",        CArgDescriptions::eBoolean, "F");    arg_desc->AddDefaultKey("greedy", "greedy",         "Use greedy algorithm for gapped extensions: 0 no, 1 one-step, 2 two-step, 3 two-step with ungapped",        CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("gopen", "gapopen", "Penalty for opening a gap",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("gext", "gapext", "Penalty for extending a gap",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("xgap", "xdrop",         "X-dropoff value for preliminary gapped extensions",        CArgDescriptions::eDouble, "0");    arg_desc->AddDefaultKey("xfinal", "xfinal",         "X-dropoff value for final gapped extensions with traceback",        CArgDescriptions::eDouble, "0");    arg_desc->AddDefaultKey("evalue", "evalue",         "E-value threshold for saving hits",        CArgDescriptions::eDouble, "0");    arg_desc->AddDefaultKey("searchsp", "searchsp",         "Virtual search space to be used for statistical calculations",        CArgDescriptions::eDouble, "0");    arg_desc->AddDefaultKey("perc", "percident",         "Percentage of identities cutoff for saving hits",        CArgDescriptions::eDouble, "0");    arg_desc->AddDefaultKey("hitlist", "hitlist_size",        "How many best matching sequences to find?",        CArgDescriptions::eInteger, "500");    arg_desc->AddDefaultKey("descr", "descriptions",        "How many matching sequence descriptions to show?",        CArgDescriptions::eInteger, "500");    arg_desc->AddDefaultKey("align", "alignments",         "How many matching sequence alignments to show?",        CArgDescriptions::eInteger, "250");    arg_desc->AddOptionalKey("out", "outfile",         "File name for writing output",        CArgDescriptions::eOutputFile);    arg_desc->AddDefaultKey("format", "format",         "How to format the results?",        CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("html", "html", "Produce HTML output?",                            CArgDescriptions::eBoolean, "F");    arg_desc->AddDefaultKey("gencode", "gencode", "Query genetic code",                            CArgDescriptions::eInteger, "1");    arg_desc->AddDefaultKey("dbgencode", "dbgencode", "Database genetic code",                            CArgDescriptions::eInteger, "1");    arg_desc->AddDefaultKey("maxintron", "maxintron",                             "Longest allowed intron length for linking HSPs",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("frameshift", "frameshift",                            "Frame shift penalty (blastx only)",                            CArgDescriptions::eInteger, "0");    arg_desc->AddOptionalKey("asnout", "seqalignasn",         "File name for writing the seqalign results in ASN.1 form",        CArgDescriptions::eOutputFile);    arg_desc->AddDefaultKey("qstart", "query_start",                            "Starting offset in query location",                            CArgDescriptions::eInteger, "0");    arg_desc->AddDefaultKey("qend", "query_end",                            "Ending offset in query location",                            CArgDescriptions::eInteger, "0");    arg_desc->AddOptionalKey("pattern", "phipattern",                            "Pattern for PHI-BLAST",                             CArgDescriptions::eString);    arg_desc->AddOptionalKey("pssm", "pssm",         "File name for uploading a PSSM for PSI BLAST seach",        CArgDescriptions::eInputFile);    arg_desc->AddOptionalKey("dbrange", "databaserange",                            "Range of ordinal ids in the BLAST database.\n"                             "Format: \"oid1 oid2\"",                             CArgDescriptions::eString);    SetupArgDescriptions(arg_desc.release());}void CBlastApplication::InitScope(void){    if (m_Scope.Empty()) {        m_ObjMgr.Reset(new CObjectManager());        m_ObjMgr->RegisterDataLoader(*new CGBDataLoader("ID", 0, 2),                CObjectManager::eDefault);        m_Scope.Reset(new CScope(*m_ObjMgr));        m_Scope->AddDefaults();        _TRACE("Blast2seqApp: Initializing scope");    }}void CBlastApplication::RegisterBlastDbLoader(char *dbname, bool db_is_na){    m_ObjMgr->RegisterDataLoader(*new CBlastDbDataLoader("BLASTDB", dbname,                   db_is_na? (CBlastDbDataLoader::eNucleotide) :                             (CBlastDbDataLoader::eProtein)),                    CObjectManager::eDefault);}voidCBlastApplication::ProcessCommandLineArgs(CBlastOptionsHandle* opts_handle,                                           BlastSeqSrc* bssp, RPSInfo *rps_info){    CArgs args = GetArgs();    CBlastOptions& opt = opts_handle->SetOptions();    EProgram program_number = opt.GetProgram();    if (args["strand"].AsInteger()) {        switch (args["strand"].AsInteger()) {        case 1: opt.SetStrandOption(eNa_strand_plus); break;        case 2: opt.SetStrandOption(eNa_strand_minus); break;        case 3: opt.SetStrandOption(eNa_strand_both); break;        default: abort();        }    }    opt.SetFilterString(args["filter"].AsString().c_str());    // FIXME: Handle lcase masking    // If lookup table type argument value is 0, the type will be set correctly    // automatically. Value 1 corresponds to megablast lookup table;    switch (args["lookup"].AsInteger()) {    case 1:        opt.SetLookupTableType(MB_LOOKUP_TABLE);        break;    default:        break;    }    if (program_number == eRPSBlast ||        program_number == eRPSTblastn) {        ASSERT(rps_info != NULL);        opt.SetGapOpeningCost(rps_info->aux_info.gap_open_penalty);        opt.SetGapExtensionCost(rps_info->aux_info.gap_extend_penalty);    }    else {        if (args["matrix"]) {            opt.SetMatrixName(args["matrix"].AsString().c_str());        }        if (args["gopen"].AsInteger() || args["greedy"].AsInteger()) {            opt.SetGapOpeningCost(args["gopen"].AsInteger());        }        if (args["gext"].AsInteger() || args["greedy"].AsInteger()) {            opt.SetGapExtensionCost(args["gext"].AsInteger());        }    }    if (args["mismatch"].AsInteger()) {        opt.SetMismatchPenalty(args["mismatch"].AsInteger());    }    if (args["match"].AsInteger()) {        opt.SetMatchReward(args["match"].AsInteger());    }    if (args["word"].AsInteger()) {        opt.SetWordSize(args["word"].AsInteger());    }    if (args["templen"].AsInteger()) {        opt.SetMBTemplateLength(args["templen"].AsInteger());    }    if (args["templtype"].AsInteger()) {        opt.SetMBTemplateType(args["templtype"].AsInteger());    }    if (args["thresh"].AsInteger()) {        opt.SetWordThreshold(args["thresh"].AsInteger());    }    if (args["window"].AsInteger()) {        opt.SetWindowSize(args["window"].AsInteger());    }    // The next 3 apply to nucleotide searches only    string program = args["program"].AsString();    if (program == "blastn") {        // Setting seed extension method involves changing the scanning         // stride as well, which is handled in the derived         // CBlastNucleotideOptionsHandle class, but not in the base        // CBlastOptionsHandle class.        CBlastNucleotideOptionsHandle* nucl_handle =             dynamic_cast<CBlastNucleotideOptionsHandle*>(opts_handle);        if (!args["templen"].AsInteger()) {            opt.SetVariableWordSize(args["varword"].AsBoolean());            switch(args["scantype"].AsInteger()) {            case 1:                nucl_handle->SetSeedExtensionMethod(eRightAndLeft);                break;            default:                nucl_handle->SetSeedExtensionMethod(eRight);                break;            }        } else {            // Discontiguous Mega BLAST: only one extension method.            nucl_handle->SetSeedExtensionMethod(eRight);         }        // Override the scan step value if it is set by user        if (args["stride"].AsInteger()) {            opt.SetScanStep(args["stride"].AsInteger());        }    }    if (args["xungap"].AsDouble()) {        opt.SetXDropoff(args["xungap"].AsDouble());    }    if (args["ungapped"].AsBoolean()) {        opt.SetGappedMode(false);    }    switch (args["greedy"].AsInteger()) {    case 1: /* Immediate greedy gapped extension with traceback */        opt.SetGapExtnAlgorithm(eGreedyWithTracebackExt);        opt.SetUngappedExtension(false);        break;    case 2: /* Two-step greedy extension, no ungapped extension */        opt.SetGapExtnAlgorithm(eGreedyExt);        opt.SetUngappedExtension(false);        break;    case 3: /* Two-step greedy extension after ungapped extension*/        opt.SetGapExtnAlgorithm(eGreedyExt);        opt.SetUngappedExtension(true);        break;    default: break;    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -