genemark_loader.cpp
来自「ncbi源码」· C++ 代码 · 共 564 行 · 第 1/2 页
CPP
564 行
/* * =========================================================================== * PRODUCTION $Log: genemark_loader.cpp,v $ * PRODUCTION Revision 1000.5 2004/06/01 20:58:40 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35 * PRODUCTION * =========================================================================== *//* $Id: genemark_loader.cpp,v 1000.5 2004/06/01 20:58:40 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Dmitry Dernovoy * * File Description: * CGeneMarkLoader - Plugin to load GeneMark's predictions */#include <ncbi_pch.hpp>#include "genemark_loader.hpp"#include <gui/core/idocument.hpp>#include <gui/core/version.hpp>#include <gui/dialogs/file_browser.hpp>#include <gui/plugin/PluginCommandSet.hpp>#include <gui/plugin/PluginInfo.hpp>#include <gui/plugin/PluginValue.hpp>#include <objects/general/Int_fuzz.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Feat_id.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/SeqFeatData.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_loc.hpp>#define GENEMARK_MAXLINE 200BEGIN_NCBI_SCOPEvoid CGeneMarkLoader::GetInfo(CPluginInfo& info){ info.Reset(); // version info macro info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0, string(__DATE__) + " " + string(__TIME__), "CGeneMarkLoader", "GeneMark\\/Glimmer output", "Load the results from a GeneMark/Glimmer run", ""); // command info CPluginCommandSet& cmds = info.SetCommands(); CPluginCommand& import_args = cmds.AddDataCommand(eDataCommand_import); import_args.AddArgument("document", "Document", CPluginArg::eDocument);}CGeneMarkLoader::CGeneMarkLoader(){}CGeneMarkLoader::~CGeneMarkLoader(){}void CGeneMarkLoader::Import(CPluginMessage& msg){ const CPluginCommand& args = msg.GetRequest().GetCommand(); CPluginReply& reply = msg.SetReply(); reply.SetStatus(eMessageStatus_failed); LOG_POST(Info << "CGeneMarkLoader::Load: start point."); IDocument* doc = const_cast<IDocument*> (&args["document"].AsDocument()); if ( !doc ) { reply.SetStatus(eMessageStatus_failed); return; } try { LOG_POST(Info << "CGeneMarkLoader::Load: reading file..."); CConstRef<CSeq_id> id(dynamic_cast<const CSeq_id*> (doc->GetObject())); if ( !id ) { _TRACE("CGeneMarkLoader::Load: can't get any reasonable seq_id"); return; } string acc_text = id->GetSeqIdString(false) ; // LOG_POST( Info << "CGeneMarkLoader:: accession: " << acc_text ); // _TRACE("CGeneMarkLoader:: accession: " << acc_text ); string tmp_str = string("(") + NStr::ToUpper(acc_text) + string(".fna.g*)"); //"(*.{gmHMM,gmark,glim_*})" string fname = NcbiFileBrowser("Open GeneMark/GeneMark.hmm/Glimmer output file...", tmp_str.c_str(),#if defined(NCBI_OS_MSWIN) "\\\\Atlas\\b11\\tatiana\\Predictions\\");#else "/net/atlas/b11/tatiana/Predictions/");#endif if ( fname.empty() ) { reply.SetStatus(eMessageStatus_ignored); return; } CBioseq_Handle handle = doc->GetScope().GetBioseqHandle(*id); if ( !handle ) { _TRACE("CGeneMarkLoader::Load: can't get bioseq handle"); return; } CRef<CSeq_annot> annot; switch(x_RecognizeFormat(fname)) { case fGeneMark: annot = x_LoadGeneMarkFile(fname, *id); break; case fGeneMarkHMM: annot = x_LoadGeneMarkHmmFile(fname, *id); break; case fGlimmer2: annot = x_LoadGlimmer2File(fname, *id); break; default: _TRACE("Unknown return code"); case fUnknownFormat: _TRACE("Unsupported file format, file: "<< fname); reply.SetStatus(eMessageStatus_failed); return; } // save the object in the reply for framework processing reply.AddObject(*doc, *annot); reply.AddAction(CPluginReplyAction::e_Add_to_document); reply.SetStatus(eMessageStatus_success); } catch (CException& e) { LOG_POST(Info << "failed to read GeneMark file: " << e.what()); _TRACE("failed to read GeneMark file: " << e.what() ); }#ifndef _DEBUG catch (...) { _TRACE("failed to read GeneMark file: unknown error"); }#endif}CGeneMarkLoader::EFileFormatCGeneMarkLoader::x_RecognizeFormat(const string& fname){ CNcbiIfstream istr( fname.c_str() ); char buf[ GENEMARK_MAXLINE +1 ]; istr.getline(buf, GENEMARK_MAXLINE); if( strstr(buf,"r=-1.") || strstr(buf,"r=-0.") ) return fGlimmer2; for(int i=1; (i < 30) && !istr.eof() ; ++i) { istr.getline(buf, GENEMARK_MAXLINE); if(strstr(buf, "Strand")) { if(strstr(buf,"Frame")) return fGeneMark; if(strstr(buf,"RightEnd")) return fGeneMarkHMM; } } return fUnknownFormat;}CRef<CSeq_annot>CGeneMarkLoader::x_LoadGlimmer2File(const string& fname, const CSeq_id& id){ CNcbiIfstream istr(fname.c_str()); CRef<CSeq_annot> annot( new CSeq_annot() ); annot->AddName("Glimmer2 predictions"); list< CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable(); char buf[ GENEMARK_MAXLINE +1 ]; while( istr.getline(buf, GENEMARK_MAXLINE) ) { CNcbiIstrstream Lstr( buf ); int gene_number = 0, glim_from = 0, glim_to = 0; Lstr >> gene_number; Lstr >> glim_from; Lstr >> glim_to; char strand = 'U'; for( ; (strand != '-') && (strand != '+'); Lstr >> strand ); TSeqPos LeftEnd, RightEnd; if(strand == '-') { LeftEnd = glim_to - 3; RightEnd = glim_from; }else{ // direct LeftEnd = glim_from; RightEnd = glim_to + 3; } if(LeftEnd > RightEnd) // Glimmer prediction over zero-point { // can't handle yet LOG_POST(Info << "CGeneMarkLoader::Can't handle Glimmer's prediction over zero-point, skipped."); continue; } CRef<CSeq_feat> feat(new CSeq_feat()); feat->SetComment() = "Glimmer2 pred #" + NStr::IntToString(gene_number) ; CSeq_interval& floc = feat->SetLocation().SetInt(); floc.SetFrom(LeftEnd- 1); floc.SetTo(RightEnd - 1); floc.SetStrand((strand == '-') ? eNa_strand_minus : eNa_strand_plus); floc.SetId().Assign(id); // floc.SetId().SetGi( NStr::StringToInt(m_SeqId) ); CSeqFeatData& fdata = feat->SetData(); CCdregion& cdreg = fdata.SetCdregion(); cdreg.SetFrame(CCdregion::eFrame_one); list< CRef< CGenetic_code::C_E > >& gcode = cdreg.SetCode().Set(); CRef< CGenetic_code::C_E > ce(new CGenetic_code::C_E); ce->SetId(11); // TSE=1; seq=1; feat=1 gcode.push_back(ce); ftable.push_back(feat); } return annot;}CRef<CSeq_annot>CGeneMarkLoader::x_LoadGeneMarkHmmFile(const string& fname, const CSeq_id& id){ CNcbiIfstream istr(fname.c_str()); CRef<CSeq_annot> annot( new CSeq_annot() );
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?