⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 id1_fetch.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/* * =========================================================================== * PRODUCTION $Log: id1_fetch.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 18:30:30  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.50 * PRODUCTION * =========================================================================== *//*  $Id: id1_fetch.cpp,v 1000.2 2004/06/01 18:30:30 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author:  Denis Vakatov, Aleksey Grichenko, Aaron Ucko * * File Description: *   New IDFETCH network client (get data from "ID1") */#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbienv.hpp>#include <corelib/ncbiargs.hpp>#include <corelib/ncbireg.hpp>#include <connect/ncbi_core_cxx.hpp>#include <serial/enumvalues.hpp>#include <serial/objostrasnb.hpp>#include <serial/iterator.hpp>#include <objects/entrez2/Entrez2_boolean_element.hpp>#include <objects/entrez2/Entrez2_boolean_exp.hpp>#include <objects/entrez2/Entrez2_boolean_reply.hpp>#include <objects/entrez2/Entrez2_db_id.hpp>#include <objects/entrez2/Entrez2_docsum.hpp>#include <objects/entrez2/Entrez2_docsum_data.hpp>#include <objects/entrez2/Entrez2_docsum_list.hpp>#include <objects/entrez2/Entrez2_eval_boolean.hpp>#include <objects/entrez2/Entrez2_id_list.hpp>#include <objects/entrez2/entrez2_client.hpp>#include <objects/general/Date.hpp>#include <objects/general/Date_std.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objects/id1/ID1Seq_hist.hpp>#include <objects/id1/ID1server_maxcomplex.hpp>#include <objects/id1/id1_client.hpp>#include <objmgr/graph_ci.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_descr.hpp>#include <objects/seq/Seq_hist_rec.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objects/seqres/Byte_graph.hpp>#include <objects/seqres/Seq_graph.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#if 1#include <objtools/flat/flat_ncbi_formatter.hpp>#else#include <objmgr/util/genbank.hpp>#endif#include <objmgr/util/sequence.hpp>#include <memory>#include <algorithm>BEGIN_NCBI_SCOPEUSING_SCOPE(NCBI_NS_NCBI::objects); // MSVC requires qualification (!)///////////////////////////////////  CId1FetchApp:://class CId1FetchApp : public CNcbiApplication{    virtual void Init(void);    virtual int  Run (void);    virtual void Exit(void);private:    bool LookUpGI(int gi);    int  LookUpFastaSeqID(const string& s);    int  LookUpFlatSeqID(const string& s);    void WriteFastaIDs     (const list< CRef< CSeq_id > >& ids);    void WriteHistoryTable (const CID1server_back& id1_reply);    void WriteQualityScores(CBioseq_Handle& handle);    CNcbiOstream*        m_OutputFile;    CID1Client           m_ID1Client;    CEntrez2Client       m_E2Client;    CRef<CObjectManager> m_ObjMgr;    CRef<CScope>         m_Scope;};void CId1FetchApp::Init(void){    // Prepare command line descriptions    //    // Create    auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);    // GI    arg_desc->AddOptionalKey        ("gi", "SeqEntryID",         "GI id of the Seq-Entry to fetch",         CArgDescriptions::eInteger);    arg_desc->SetConstraint        ("gi", new CArgAllow_Integers(0, 99999999));    // Output format    arg_desc->AddDefaultKey        ("fmt", "OutputFormat",         "Format to dump the resulting data in",         CArgDescriptions::eString, "asn");    arg_desc->SetConstraint        ("fmt", &(*new CArgAllow_Strings,                  "asn", "asnb", "xml", "genbank", "genpept", "fasta",                  "quality", "docsum"));    // Output datafile    arg_desc->AddDefaultKey        ("out", "ResultFile",         "File to dump the resulting data to",         CArgDescriptions::eOutputFile, "-", CArgDescriptions::fBinary);    // Log file    arg_desc->AddOptionalKey        ("log", "LogFile",         "File to post errors and messages to",         CArgDescriptions::eOutputFile,         0);    // Database to use    arg_desc->AddOptionalKey        ("db", "Database", // was "-d" in `idfetch'         "Database to use",         CArgDescriptions::eString);        // Entity number    arg_desc->AddOptionalKey        ("ent", "EntityNumber", // was "-e" in `idfetch'         "(Sub)entity number (retrieval number) to dump",         CArgDescriptions::eInteger);    arg_desc->SetConstraint        ("ent", new CArgAllow_Integers(0, 99999999));    // Type of lookup    arg_desc->AddDefaultKey        ("lt", "LookupType", // combination of "-i" (!) and "-n" in `idfetch'         "Type of lookup",         CArgDescriptions::eString, "entry");    arg_desc->SetConstraint        ("lt", &(*new CArgAllow_Strings,                 "entry", "state", "ids", "history", "revisions", "none"));        // File with list of stuff to dump    arg_desc->AddOptionalKey        ("in", "RequestFile", // was "-G" (!) in `idfetch'         "File with list of GIs, (versioned) accessions, FASTA SeqIDs to dump",         CArgDescriptions::eInputFile, CArgDescriptions::fPreOpen);             // Maximum complexity    arg_desc->AddDefaultKey        ("maxplex", "MaxComplexity", // was "-c" in `idfetch'         "Maximum complexity to return",         CArgDescriptions::eString, "entry");    arg_desc->SetConstraint        ("maxplex", &(*new CArgAllow_Strings,                      "entry", "bioseq", "bioseq-set", "nuc-prot",                      "pub-set"));        // Flattened SeqID    arg_desc->AddOptionalKey        ("flat", "FlatID", // was "-f" in `idfetch'         "Flattened SeqID; format can be\n"         "\t'type([name][,[accession][,[release][,version]]])'"         " [e.g., '5(HUMHBB)'],\n"         "\ttype=accession[.version], or type:number",         CArgDescriptions::eString);        // FASTA-style SeqID    arg_desc->AddOptionalKey        ("fasta", "FastaID", // was "-s" in `idfetch'         "FASTA-style SeqID, in the form \"type|data\"; choices are\n"         "\tlcl|int lcl|str bbs|int bbm|int gim|int gb|acc|loc emb|acc|loc\n"         "\tpir|acc|name sp|acc|name pat|country|patent|seq ref|acc|name|rel\n"         "\tgnl|db|id gi|int dbj|acc|loc prf|acc|name pdb|entry|chain\n"         "\ttpg|acc|name tpe|acc|name tpd|acc|name",         CArgDescriptions::eString);    // Generate GI list by Entrez query    arg_desc->AddOptionalKey        ("query", "EntrezQueryString", // was "-q" in `idfetch'         "Generate GI list by Entrez query given on command line",         CArgDescriptions::eString);    arg_desc->AddOptionalKey        ("qf", "EntrezQueryFile", // was "-Q" in `idfetch'         "Generate GI list by Entrez query in given file",         CArgDescriptions::eInputFile, CArgDescriptions::fPreOpen);    // Program description    string prog_description =        "Fetch SeqEntry from ID server by its GI ID, possibly obtained from\n"        "its SeqID or an Entrez query";    arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),                              prog_description, false);    arg_desc->AddDefaultKey        ("repeat", "repeat",         "Repeat fetch number of times",         CArgDescriptions::eInteger, "1");    // Pass argument descriptions to the application    //    SetupArgDescriptions(arg_desc.release());}// Workaround for "replace_if"inline bool s_IsControl(char c){    return iscntrl(c) ? true : false;}int CId1FetchApp::Run(void){    // Process command line args    const CArgs& args = GetArgs();    // Setup and tune logging facilities    if ( args["log"] ) {        SetDiagStream( &args["log"].AsOutputFile() );    }#ifdef _DEBUG    // SetDiagTrace(eDT_Enable);    SetDiagPostLevel(eDiag_Info);    SetDiagPostFlag(eDPF_All);#endif    // Make sure the combination of arguments is valid    {{        int id_count = 0;        const string& fmt = args["fmt"].AsString();        if (args["gi"])     id_count++;        if (args["in"])     id_count++;        if (args["flat"])   id_count++;        if (args["fasta"])  id_count++;        if (args["query"])  id_count++;        if (args["qf"])     id_count++;        if (id_count != 1) {            NCBI_THROW(CArgException,eNoArg,                "You must supply exactly one argument"                " indicating what to look up.");        }        if ((args["query"]  ||  args["qf"]  ||  fmt == "docsum")            &&  !args["db"]) {            ERR_POST("No Entrez database supplied.  Try -db Nucleotide or "                     "-db Protein.");            return -1;        }        if ((fmt == "genbank"  ||  fmt == "genpept"  ||  fmt == "quality")            &&  args["lt"].AsString() != "entry") {            ERR_POST("The output format '" << fmt                     << "' is only available for Seq-Entries.");            return -1;        }    }}    // Setup application registry, error log, and MT-lock for CONNECT library    CONNECT_Init(&GetConfig());    m_E2Client.SetDefaultRequest().SetTool("id1_fetch");    // Open output file    m_OutputFile = &args["out"].AsOutputFile();    // Set up object manager    m_ObjMgr = new CObjectManager;    m_Scope = new CScope(*m_ObjMgr);    m_ObjMgr->RegisterDataLoader( *new CGBDataLoader("GENBANK"));    m_Scope->AddDataLoader("GENBANK");    int repeat = args["repeat"].AsInteger();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -