📄 id1_fetch.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: id1_fetch.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 18:30:30 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.50 * PRODUCTION * =========================================================================== *//* $Id: id1_fetch.cpp,v 1000.2 2004/06/01 18:30:30 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Denis Vakatov, Aleksey Grichenko, Aaron Ucko * * File Description: * New IDFETCH network client (get data from "ID1") */#include <ncbi_pch.hpp>#include <corelib/ncbiapp.hpp>#include <corelib/ncbienv.hpp>#include <corelib/ncbiargs.hpp>#include <corelib/ncbireg.hpp>#include <connect/ncbi_core_cxx.hpp>#include <serial/enumvalues.hpp>#include <serial/objostrasnb.hpp>#include <serial/iterator.hpp>#include <objects/entrez2/Entrez2_boolean_element.hpp>#include <objects/entrez2/Entrez2_boolean_exp.hpp>#include <objects/entrez2/Entrez2_boolean_reply.hpp>#include <objects/entrez2/Entrez2_db_id.hpp>#include <objects/entrez2/Entrez2_docsum.hpp>#include <objects/entrez2/Entrez2_docsum_data.hpp>#include <objects/entrez2/Entrez2_docsum_list.hpp>#include <objects/entrez2/Entrez2_eval_boolean.hpp>#include <objects/entrez2/Entrez2_id_list.hpp>#include <objects/entrez2/entrez2_client.hpp>#include <objects/general/Date.hpp>#include <objects/general/Date_std.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objects/id1/ID1Seq_hist.hpp>#include <objects/id1/ID1server_maxcomplex.hpp>#include <objects/id1/id1_client.hpp>#include <objmgr/graph_ci.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objmgr/seq_vector.hpp>#include <objtools/data_loaders/genbank/gbloader.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_descr.hpp>#include <objects/seq/Seq_hist_rec.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objects/seqres/Byte_graph.hpp>#include <objects/seqres/Seq_graph.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqset/Bioseq_set.hpp>#if 1#include <objtools/flat/flat_ncbi_formatter.hpp>#else#include <objmgr/util/genbank.hpp>#endif#include <objmgr/util/sequence.hpp>#include <memory>#include <algorithm>BEGIN_NCBI_SCOPEUSING_SCOPE(NCBI_NS_NCBI::objects); // MSVC requires qualification (!)/////////////////////////////////// CId1FetchApp:://class CId1FetchApp : public CNcbiApplication{ virtual void Init(void); virtual int Run (void); virtual void Exit(void);private: bool LookUpGI(int gi); int LookUpFastaSeqID(const string& s); int LookUpFlatSeqID(const string& s); void WriteFastaIDs (const list< CRef< CSeq_id > >& ids); void WriteHistoryTable (const CID1server_back& id1_reply); void WriteQualityScores(CBioseq_Handle& handle); CNcbiOstream* m_OutputFile; CID1Client m_ID1Client; CEntrez2Client m_E2Client; CRef<CObjectManager> m_ObjMgr; CRef<CScope> m_Scope;};void CId1FetchApp::Init(void){ // Prepare command line descriptions // // Create auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions); // GI arg_desc->AddOptionalKey ("gi", "SeqEntryID", "GI id of the Seq-Entry to fetch", CArgDescriptions::eInteger); arg_desc->SetConstraint ("gi", new CArgAllow_Integers(0, 99999999)); // Output format arg_desc->AddDefaultKey ("fmt", "OutputFormat", "Format to dump the resulting data in", CArgDescriptions::eString, "asn"); arg_desc->SetConstraint ("fmt", &(*new CArgAllow_Strings, "asn", "asnb", "xml", "genbank", "genpept", "fasta", "quality", "docsum")); // Output datafile arg_desc->AddDefaultKey ("out", "ResultFile", "File to dump the resulting data to", CArgDescriptions::eOutputFile, "-", CArgDescriptions::fBinary); // Log file arg_desc->AddOptionalKey ("log", "LogFile", "File to post errors and messages to", CArgDescriptions::eOutputFile, 0); // Database to use arg_desc->AddOptionalKey ("db", "Database", // was "-d" in `idfetch' "Database to use", CArgDescriptions::eString); // Entity number arg_desc->AddOptionalKey ("ent", "EntityNumber", // was "-e" in `idfetch' "(Sub)entity number (retrieval number) to dump", CArgDescriptions::eInteger); arg_desc->SetConstraint ("ent", new CArgAllow_Integers(0, 99999999)); // Type of lookup arg_desc->AddDefaultKey ("lt", "LookupType", // combination of "-i" (!) and "-n" in `idfetch' "Type of lookup", CArgDescriptions::eString, "entry"); arg_desc->SetConstraint ("lt", &(*new CArgAllow_Strings, "entry", "state", "ids", "history", "revisions", "none")); // File with list of stuff to dump arg_desc->AddOptionalKey ("in", "RequestFile", // was "-G" (!) in `idfetch' "File with list of GIs, (versioned) accessions, FASTA SeqIDs to dump", CArgDescriptions::eInputFile, CArgDescriptions::fPreOpen); // Maximum complexity arg_desc->AddDefaultKey ("maxplex", "MaxComplexity", // was "-c" in `idfetch' "Maximum complexity to return", CArgDescriptions::eString, "entry"); arg_desc->SetConstraint ("maxplex", &(*new CArgAllow_Strings, "entry", "bioseq", "bioseq-set", "nuc-prot", "pub-set")); // Flattened SeqID arg_desc->AddOptionalKey ("flat", "FlatID", // was "-f" in `idfetch' "Flattened SeqID; format can be\n" "\t'type([name][,[accession][,[release][,version]]])'" " [e.g., '5(HUMHBB)'],\n" "\ttype=accession[.version], or type:number", CArgDescriptions::eString); // FASTA-style SeqID arg_desc->AddOptionalKey ("fasta", "FastaID", // was "-s" in `idfetch' "FASTA-style SeqID, in the form \"type|data\"; choices are\n" "\tlcl|int lcl|str bbs|int bbm|int gim|int gb|acc|loc emb|acc|loc\n" "\tpir|acc|name sp|acc|name pat|country|patent|seq ref|acc|name|rel\n" "\tgnl|db|id gi|int dbj|acc|loc prf|acc|name pdb|entry|chain\n" "\ttpg|acc|name tpe|acc|name tpd|acc|name", CArgDescriptions::eString); // Generate GI list by Entrez query arg_desc->AddOptionalKey ("query", "EntrezQueryString", // was "-q" in `idfetch' "Generate GI list by Entrez query given on command line", CArgDescriptions::eString); arg_desc->AddOptionalKey ("qf", "EntrezQueryFile", // was "-Q" in `idfetch' "Generate GI list by Entrez query in given file", CArgDescriptions::eInputFile, CArgDescriptions::fPreOpen); // Program description string prog_description = "Fetch SeqEntry from ID server by its GI ID, possibly obtained from\n" "its SeqID or an Entrez query"; arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), prog_description, false); arg_desc->AddDefaultKey ("repeat", "repeat", "Repeat fetch number of times", CArgDescriptions::eInteger, "1"); // Pass argument descriptions to the application // SetupArgDescriptions(arg_desc.release());}// Workaround for "replace_if"inline bool s_IsControl(char c){ return iscntrl(c) ? true : false;}int CId1FetchApp::Run(void){ // Process command line args const CArgs& args = GetArgs(); // Setup and tune logging facilities if ( args["log"] ) { SetDiagStream( &args["log"].AsOutputFile() ); }#ifdef _DEBUG // SetDiagTrace(eDT_Enable); SetDiagPostLevel(eDiag_Info); SetDiagPostFlag(eDPF_All);#endif // Make sure the combination of arguments is valid {{ int id_count = 0; const string& fmt = args["fmt"].AsString(); if (args["gi"]) id_count++; if (args["in"]) id_count++; if (args["flat"]) id_count++; if (args["fasta"]) id_count++; if (args["query"]) id_count++; if (args["qf"]) id_count++; if (id_count != 1) { NCBI_THROW(CArgException,eNoArg, "You must supply exactly one argument" " indicating what to look up."); } if ((args["query"] || args["qf"] || fmt == "docsum") && !args["db"]) { ERR_POST("No Entrez database supplied. Try -db Nucleotide or " "-db Protein."); return -1; } if ((fmt == "genbank" || fmt == "genpept" || fmt == "quality") && args["lt"].AsString() != "entry") { ERR_POST("The output format '" << fmt << "' is only available for Seq-Entries."); return -1; } }} // Setup application registry, error log, and MT-lock for CONNECT library CONNECT_Init(&GetConfig()); m_E2Client.SetDefaultRequest().SetTool("id1_fetch"); // Open output file m_OutputFile = &args["out"].AsOutputFile(); // Set up object manager m_ObjMgr = new CObjectManager; m_Scope = new CScope(*m_ObjMgr); m_ObjMgr->RegisterDataLoader( *new CGBDataLoader("GENBANK")); m_Scope->AddDataLoader("GENBANK"); int repeat = args["repeat"].AsInteger();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -