file_loader.cpp
来自「ncbi源码」· C++ 代码 · 共 1,097 行 · 第 1/3 页
CPP
1,097 行
CPluginReply& reply){ CNcbiIfstream input(fname.c_str(), IOS_BASE::in | IOS_BASE::binary); x_LoadStream(input, user_fmt, doc, reply);}//// x_LoadStream()// This is the main internal workhorse function. This function is responsible// for trying a number of different known formats using a number of different// data types. The attempt is to support a core level of file read that covers// a wide range of expected types. Currently supported are: ASN.1 (text and// binary) and XML format of Seq-entry / Bioseq/ Bioseq-set / Seq-annot /// Seq-feat / Seq-align; FastA format; Newick phylogenetic tree format;// and various sequence alignment formats.//void CDataPlugin_FileLoader::x_LoadStream(CNcbiIstream& input, EFormat user_fmt, IDocument* doc, CPluginReply& reply){ CT_POS_TYPE orig_pos = input.tellg(); CFormatGuess::EFormat fmt = CFormatGuess::eUnknown; // adjust 'fmt' based on user preferences switch (user_fmt) { case eFormat_AsnBinary: fmt = CFormatGuess::eBinaryASN; break; case eFormat_AsnText: fmt = CFormatGuess::eTextASN; break; case eFormat_XML: fmt = CFormatGuess::eXml; break; case eFormat_FastA: fmt = CFormatGuess::eFasta; break; case eFormat_TextAlign: case eFormat_NewickTree: fmt = CFormatGuess::eUnknown; break; default: case eFormat_Autodetect: {{ CFormatGuess fg; fmt = fg.Format(input); }} break; } // // now, proceed! // switch (fmt) { case CFormatGuess::eBinaryASN: case CFormatGuess::eTextASN: case CFormatGuess::eXml: // // ASN and XML go through the standard serialization hooks // {{ ESerialDataFormat sfmt = FormatGuess2Serial(fmt); CSniffReader sniffer(*this, doc, reply); auto_ptr<CObjectIStream> sinput(CObjectIStream::Open(sfmt, input)); sniffer.Probe(*sinput); reply.SetStatus(eMessageStatus_success); }} break; case CFormatGuess::eFasta: // // FASTA uses a specialized reader // try { CRef<CSeq_entry> entry = ReadFasta(input, fReadFasta_AssumeNuc); if ( !entry ) { NCBI_THROW(CDataPlugin_FileLoaderException, eInvalidFormat, "file is not FASTA format"); } if (doc) { doc->GetScope().AddTopLevelSeqEntry(*entry); reply.AddObject(*doc, *entry); } else { CRef<CScope> scope(new CScope(CDocManager::GetObjectManager())); scope->AddTopLevelSeqEntry(*entry); scope->AddDefaults(); IDocument* doc = CDocManager::CreateDocument(*scope, *entry); reply.AddObject(*doc); } reply.SetStatus(eMessageStatus_success); LOG_POST(Info << "Imported file as FASTA sequence"); } catch (CException& _DEBUG_ARG(e)) { _TRACE("failed to read FastA: " << e.what()); } catch (runtime_error& _DEBUG_ARG(e)) { _TRACE("failed to read FastA: " << e.what()); }#ifndef _DEBUG catch (...) { _TRACE("failed to read FastA: unknown error"); }#endif break; case CFormatGuess::eUnknown: // // try text alignment formats // if (user_fmt == eFormat_TextAlign || user_fmt == eFormat_Autodetect) { try { // try text alignment CRef<CSeq_entry> entry; const size_t max_alphas = 2; const CAlnReader::EAlphabet alphas[max_alphas] = { CAlnReader::eAlpha_Nucleotide, CAlnReader::eAlpha_Protein }; for (size_t i = 0; i < max_alphas && !entry; ++i) { try { input.clear(); input.seekg(orig_pos); CAlnReader reader(input); reader.SetClustal(alphas[i]); reader.Read(); entry = reader.GetSeqEntry(); } catch (CObjReaderParseException& e) { _TRACE("clustal nucleotide failed: " << e.what()); } } if ( !entry ) { NCBI_THROW(CException, eUnknown, "failed to read as clustal alignment"); } if (doc) { doc->GetScope().AddTopLevelSeqEntry(*entry); reply.AddObject(*doc, *entry); } else { CRef<CScope> scope(new CScope(CDocManager::GetObjectManager())); scope->AddTopLevelSeqEntry(*entry); scope->AddDefaults(); IDocument* doc = CDocManager::CreateDocument(*scope, *entry); reply.AddObject(*doc); } reply.SetStatus(eMessageStatus_success); return; } catch (CException& e) { _TRACE("failed to read text alignment: " << e.what()); } catch (exception& e) { _TRACE("failed to read text alignment: " << e.what()); }#ifndef _DEBUG catch (...) { _TRACE("failed to read text alignment: unknown error"); }#endif } // // try Newick format phylogenetic tree // if (user_fmt == eFormat_NewickTree || user_fmt == eFormat_Autodetect) { try { // load the tree input.clear(); input.seekg(orig_pos); auto_ptr<TPhyTreeNode> tree(ReadNewickTree(input)); // make a serial object CRef<CPhyTreeSerial> stree(new CPhyTreeSerial(*tree)); // make a document containing this CRef<CScope> scope(new CScope(CDocManager::GetObjectManager())); scope->AddDefaults(); IDocument* new_doc = CDocManager::CreateDocument(*scope, *stree); reply.AddObject(*new_doc); reply.SetStatus(eMessageStatus_success); return; } catch (CException& e) { _TRACE("failed to read Newick file: " << e.what()); } catch (exception& e) { _TRACE("failed to read Newick file: " << e.what()); }#ifndef _DEBUG catch (...) { }#endif } LOG_POST(Error << "failed to load file. Unknown format."); break; default: break; }}void CDataPlugin_FileLoader::Load(const CObjectInfo& info, IDocument* doc, CPluginReply& reply){ try { // we explore a series of objects in descending order of size / // data model coverage {{ CSeq_submit* seq_submit = CType<CSeq_submit>().Get(info); if (seq_submit) { x_Load(*seq_submit, doc, reply); return; } }} {{ CSeq_entry* seq_entry = CType<CSeq_entry>().Get(info); if (seq_entry) { x_Load(*seq_entry, doc, reply); return; } }} {{ CBioseq_set* bioseq_set = CType<CBioseq_set>().Get(info); if (bioseq_set) { x_Load(*bioseq_set, doc, reply); return; } }} {{ CBioseq* bioseq = CType<CBioseq>().Get(info); if (bioseq) { x_Load(*bioseq, doc, reply); return; } }} {{ CSeq_annot* annot = CType<CSeq_annot>().Get(info); if (annot) { x_Load(*annot, doc, reply); return; } }} {{ CSeq_align* align = CType<CSeq_align>().Get(info); if (align) { x_Load(*align, doc, reply); return; } }} {{ CSeq_align_set* align = CType<CSeq_align_set>().Get(info); if (align) { x_Load(*align, doc, reply); return; } }} } catch (CException& _DEBUG_ARG(e)) { _TRACE("failed to open document: " << e.what()); }}//// x_Load() - load a given record into a document//void CDataPlugin_FileLoader::x_Load(CSeq_entry& entry, IDocument* doc, CPluginReply& reply){ if (doc) { doc->GetScope().AddTopLevelSeqEntry(entry); reply.AddObject(*doc, entry); } else { CRef<CScope> scope(new CScope(CDocManager::GetObjectManager())); scope->AddTopLevelSeqEntry(entry); scope->AddDefaults(); doc = CDocManager::CreateDocument(*scope, entry); reply.AddObject(*doc); } reply.SetStatus(eMessageStatus_success); LOG_POST(Info << "Imported Seq-entry");}//// x_Load() - load a given record into a document// This version adds a given alignment to the current record, or creates a new// one as needed.//void CDataPlugin_FileLoader::x_Load(CSeq_align& align, IDocument* doc, CPluginReply& reply){ CObjectConverter::TObjList objs; CRef<CScope> scope; if (doc) { scope = &doc->GetScope(); } else { scope = (new CScope(CDocManager::GetObjectManager())); scope->AddDefaults(); } CObjectConverter::Convert(*scope, align, CSeq_annot::GetTypeInfo(), objs); NON_CONST_ITERATE (CObjectConverter::TObjList, iter, objs) { CSeq_annot& annot = const_cast<CSeq_annot&>(dynamic_cast<const CSeq_annot&>(**iter)); x_Load(annot, doc, reply); }}void CDataPlugin_FileLoader::x_Load(CSeq_align_set& align, IDocument* doc, CPluginReply& reply){ CObjectConverter::TObjList objs; CRef<CScope> scope; if (doc) { scope = &doc->GetScope(); } else { scope = (new CScope(CDocManager::GetObjectManager())); scope->AddDefaults(); } CObjectConverter::Convert(*scope, align, CSeq_annot::GetTypeInfo(), objs); NON_CONST_ITERATE (CObjectConverter::TObjList, iter, objs) { CSeq_annot& annot = const_cast<CSeq_annot&>(dynamic_cast<const CSeq_annot&>(**iter)); x_Load(annot, doc, reply); }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?