📄 id1_fetch.cpp
字号:
for ( int pass = 0; pass < repeat; ++pass ) { if (args["gi"]) { if ( !LookUpGI(args["gi"].AsInteger()) ) return -1; } if (args["fasta"]) { int gi = LookUpFastaSeqID(args["fasta"].AsString()); if (gi <= 0 || !LookUpGI(gi)) { return -1; } } if (args["flat"]) { int gi = LookUpFlatSeqID(args["flat"].AsString()); if (gi <= 0 || !LookUpGI(gi)) { return -1; } } if (args["in"]) { CNcbiIstream& is = args["in"].AsInputFile(); while (is && !is.eof()) { string id; int gi; is >> id; if (id.empty()) { break; } if (id.find('|') != NPOS) { gi = LookUpFastaSeqID(id); } else if (id.find_first_of(":=(") != NPOS) { gi = LookUpFlatSeqID(id); } else { gi = NStr::StringToInt(id); } if (gi <= 0 || !LookUpGI(gi)) { return -1; } } } if (args["query"] || args["qf"]) { // Form query CRef<CEntrez2_boolean_element> e2_element (new CEntrez2_boolean_element); if (args["query"]) { e2_element->SetStr(args["query"].AsString()); } else { CNcbiIstream& is = args["qf"].AsInputFile(); CNcbiOstrstream oss; oss << is.rdbuf(); string& str = e2_element->SetStr(); str.assign(oss.str(), oss.pcount()); oss.freeze(false); replace_if(str.begin(), str.end(), s_IsControl, ' '); } // Make the actual query CRef<CEntrez2_boolean_reply> reply; {{ CEntrez2_eval_boolean eb; eb.SetReturn_UIDs(true); CEntrez2_boolean_exp& query = eb.SetQuery(); query.SetExp().push_back(e2_element); query.SetDb() = CEntrez2_db_id(args["db"].AsString()); reply = m_E2Client.AskEval_boolean(eb); }} if ( !reply->GetCount() ) { ERR_POST("Entrez query returned no results."); return -1; } // Query succeeded; proceed to next stage of lookup for (CEntrez2_id_list::TConstUidIterator it = reply->GetUids().GetConstUidIterator(); !it.AtEnd(); ++it) { if ( !LookUpGI(*it) ) { return -1; } } } } return 0;}bool CId1FetchApp::LookUpGI(int gi){ const CArgs& args = GetArgs(); const string& fmt = args["fmt"].AsString(); const string& lt = args["lt"].AsString(); CConstRef<CSerialObject> reply_object; bool use_objmgr = false; if (lt == "none") { *m_OutputFile << gi << NcbiEndl; return true; // Done } else if (fmt == "docsum") { // Handling this here costs some efficiency when the GI came // from an Entrez query in the first place, but wins on generality. CEntrez2_id_list uids; uids.SetDb() = CEntrez2_db_id(args["db"].AsString()); uids.SetNum(1); uids.SetUids().resize(uids.sm_UidSize); {{ CEntrez2_id_list::TUidIterator it = uids.GetUidIterator(); *it = gi; }} CRef<CEntrez2_docsum_list> docs = m_E2Client.AskGet_docsum(uids); if ( !docs->GetCount() ) { ERR_POST("Entrez query returned no results."); return false; } string caption, title; for (CTypeConstIterator<CEntrez2_docsum_data> it = ConstBegin(*docs); it; ++it) { // Should this be case-insensitive? if (it->GetField_name() == "Caption") { caption = it->GetField_value(); } else if (it->GetField_name() == "Title") { title = it->GetField_value(); } } *m_OutputFile << '>'; if ( !caption.empty() ) { *m_OutputFile << caption; } *m_OutputFile << ' '; if ( !title.empty() ) { *m_OutputFile << title; } } else if (lt == "entry") { use_objmgr = true; } else if (lt == "state") { CRef<CID1server_back> id1_reply(new CID1server_back); int state = m_ID1Client.AskGetgistate(gi, id1_reply); if (fmt == "fasta") { *m_OutputFile << "gi = " << gi << ", states: "; switch (state & 0xff) { case 0: *m_OutputFile << "NONEXISTENT"; break; // was "NOT EXIST" case 10: *m_OutputFile << "DELETED"; break; case 20: *m_OutputFile << "REPLACED"; break; case 40: *m_OutputFile << "LIVE"; break; default: *m_OutputFile << "UNKNOWN"; break; } if (state & 0x100) { *m_OutputFile << "|SUPPRESSED"; } if (state & 0x200) { *m_OutputFile << "|WITHDRAWN"; } if (state & 0x400) { *m_OutputFile << "|CONFIDENTIAL"; } } else { reply_object = id1_reply; } } else if (lt == "ids") {#if 1 CRef<CID1server_back> id1_reply(new CID1server_back); CID1server_back::TIds ids = m_ID1Client.AskGetseqidsfromgi(gi, id1_reply); if (fmt == "fasta") { WriteFastaIDs(ids); } else { reply_object = id1_reply; }#else use_objmgr = true;#endif } else if (lt == "history" || lt == "revisions") { CRef<CID1server_back> id1_reply(new CID1server_back); // ignore result -- it's simpler to use id1_reply if (lt == "history") { m_ID1Client.AskGetgihist(gi, id1_reply); } else { m_ID1Client.AskGetgirev(gi, id1_reply); } if (fmt == "fasta") { WriteHistoryTable(*id1_reply); } else { reply_object = id1_reply; } } CBioseq_Handle handle; if (use_objmgr) { // What about db, ent, and maxplex? CSeq_id id; id.SetGi(gi); handle = m_Scope->GetBioseqHandle(id); if ( !handle ) { ERR_POST(Fatal << "Bioseq not found: " << id.DumpAsFasta()); } reply_object.Reset(&handle.GetTopLevelSeqEntry()); } // Dump server response in the specified format ESerialDataFormat format = eSerial_None; if (fmt == "asn") { format = eSerial_AsnText; } else if (fmt == "asnb") { format = eSerial_AsnBinary; } else if (fmt == "xml") { format = eSerial_Xml; } else if (fmt == "fasta" && lt == "ids") { if (use_objmgr) { WriteFastaIDs(handle.GetBioseq().GetId()); } } else if (fmt == "fasta" && lt == "entry") { CFastaOstream out(*m_OutputFile); out.SetFlag(CFastaOstream::eAssembleParts); out.Write(handle); } else if (fmt == "quality") { WriteQualityScores(handle); } else if (fmt == "genbank" || fmt == "genpept") { bool gp = fmt == "genpept"; const CSeq_entry& entry = handle.GetTopLevelSeqEntry();#if 1 CFlatNCBIFormatter formatter(*new CFlatTextOStream(*m_OutputFile), *m_Scope, IFlatFormatter::eMode_Entrez); formatter.Format(entry, formatter, gp ? IFlatFormatter::fSkipNucleotides : IFlatFormatter::fSkipProteins);#else CGenbankWriter(*m_OutputFile, *m_Scope, gp ? CGenbankWriter::eFormat_Genpept : CGenbankWriter::eFormat_Genbank) .Write(entry);#endif } if (reply_object.NotEmpty() && format != eSerial_None) { auto_ptr<CObjectOStream> asn_output (CObjectOStream::Open(format, *m_OutputFile)); // *asn_output << *reply_object; asn_output->Write(reply_object, reply_object->GetThisTypeInfo()); } if (fmt != "asnb") { *m_OutputFile << NcbiEndl; } return true; // Done}// Cleanupvoid CId1FetchApp::Exit(void){ SOCK_ShutdownAPI(); SetDiagStream(0);}int CId1FetchApp::LookUpFastaSeqID(const string& s){ CSeq_id id(s); return m_ID1Client.AskGetgi(id);}int CId1FetchApp::LookUpFlatSeqID(const string& s){ CSeq_id::E_Choice type = static_cast<CSeq_id::E_Choice>(atoi(s.c_str())); SIZE_TYPE pos = s.find_first_of(":=("); if (pos == NPOS) { THROW0_TRACE(runtime_error("Malformatted flat ID " + s)); } string data = s.substr(pos + 1); switch (s[pos]) { case ':': case '=': { CSeq_id id(type, data, kEmptyStr); return m_ID1Client.AskGetgi(id); } case '(': { data.erase(data.end() - 1); // remove last character, which should be ')' vector<string> pieces; NStr::Tokenize(data, ",", pieces); pieces.resize(4, kEmptyStr); // name acc rel ver -> acc name ver rel CSeq_id id(type, pieces[1], pieces[0], pieces[3], pieces[2]); return m_ID1Client.AskGetgi(id); } default: // can't happen, but shut the compiler up return -1; }}void CId1FetchApp::WriteFastaIDs(const list< CRef< CSeq_id > >& ids){ ITERATE (list< CRef< CSeq_id > >, it, ids) { if (it != ids.begin()) { *m_OutputFile << '|'; } (*it)->WriteAsFasta(*m_OutputFile); }}// for formatting textclass CTextColumn{public: CTextColumn() : m_Width(0) { } CTextColumn& Add(string s) { m_Strings.push_back(s); if (s.size() > m_Width) m_Width = s.size(); return *this; } string Get(unsigned int index) const { const string& s = m_Strings[index];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -