genemark_loader.cpp

来自「ncbi源码」· C++ 代码 · 共 564 行 · 第 1/2 页

CPP
564
字号
/* * =========================================================================== * PRODUCTION $Log: genemark_loader.cpp,v $ * PRODUCTION Revision 1000.5  2004/06/01 20:58:40  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35 * PRODUCTION * =========================================================================== *//*  $Id: genemark_loader.cpp,v 1000.5 2004/06/01 20:58:40 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Dmitry Dernovoy * * File Description: *   CGeneMarkLoader - Plugin to load GeneMark's predictions  */#include <ncbi_pch.hpp>#include "genemark_loader.hpp"#include <gui/core/idocument.hpp>#include <gui/core/version.hpp>#include <gui/dialogs/file_browser.hpp>#include <gui/plugin/PluginCommandSet.hpp>#include <gui/plugin/PluginInfo.hpp>#include <gui/plugin/PluginValue.hpp>#include <objects/general/Int_fuzz.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Feat_id.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/SeqFeatData.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_loc.hpp>#define            GENEMARK_MAXLINE           200BEGIN_NCBI_SCOPEvoid CGeneMarkLoader::GetInfo(CPluginInfo& info){    info.Reset();    // version info macro    info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,                 string(__DATE__) + " " + string(__TIME__),                 "CGeneMarkLoader",                 "GeneMark\\/Glimmer output",                 "Load the results from a GeneMark/Glimmer run", "");    // command info    CPluginCommandSet& cmds     = info.SetCommands();    CPluginCommand& import_args = cmds.AddDataCommand(eDataCommand_import);    import_args.AddArgument("document", "Document", CPluginArg::eDocument);}CGeneMarkLoader::CGeneMarkLoader(){}CGeneMarkLoader::~CGeneMarkLoader(){}void CGeneMarkLoader::Import(CPluginMessage& msg){    const CPluginCommand& args = msg.GetRequest().GetCommand();    CPluginReply& reply = msg.SetReply();    reply.SetStatus(eMessageStatus_failed);    LOG_POST(Info << "CGeneMarkLoader::Load: start point.");    IDocument* doc = const_cast<IDocument*> (&args["document"].AsDocument());    if ( !doc ) {        reply.SetStatus(eMessageStatus_failed);        return;    }    try {        LOG_POST(Info << "CGeneMarkLoader::Load: reading file...");        CConstRef<CSeq_id> id(dynamic_cast<const CSeq_id*> (doc->GetObject()));        if ( !id ) {            _TRACE("CGeneMarkLoader::Load: can't get any reasonable seq_id");            return;        }        string acc_text = id->GetSeqIdString(false) ;        //   LOG_POST( Info << "CGeneMarkLoader:: accession: " << acc_text );        //  _TRACE("CGeneMarkLoader:: accession: " << acc_text );        string tmp_str =  string("(") + NStr::ToUpper(acc_text) + string(".fna.g*)");  //"(*.{gmHMM,gmark,glim_*})"        string fname =            NcbiFileBrowser("Open GeneMark/GeneMark.hmm/Glimmer output file...",                            tmp_str.c_str(),#if defined(NCBI_OS_MSWIN)                            "\\\\Atlas\\b11\\tatiana\\Predictions\\");#else                            "/net/atlas/b11/tatiana/Predictions/");#endif        if ( fname.empty() ) {            reply.SetStatus(eMessageStatus_ignored);            return;        }        CBioseq_Handle handle = doc->GetScope().GetBioseqHandle(*id);        if ( !handle ) {            _TRACE("CGeneMarkLoader::Load: can't get bioseq handle");            return;        }        CRef<CSeq_annot> annot;        switch(x_RecognizeFormat(fname))        {        case fGeneMark:            annot = x_LoadGeneMarkFile(fname, *id);            break;        case fGeneMarkHMM:            annot = x_LoadGeneMarkHmmFile(fname, *id);            break;        case fGlimmer2:            annot = x_LoadGlimmer2File(fname, *id);            break;        default:            _TRACE("Unknown return code");        case fUnknownFormat:            _TRACE("Unsupported file format, file: "<< fname);             reply.SetStatus(eMessageStatus_failed);            return;        }        // save the object in the reply for framework processing        reply.AddObject(*doc, *annot);        reply.AddAction(CPluginReplyAction::e_Add_to_document);        reply.SetStatus(eMessageStatus_success);    }    catch (CException& e) {        LOG_POST(Info << "failed to read GeneMark file: " << e.what());        _TRACE("failed to read GeneMark file: " << e.what() );    }#ifndef _DEBUG    catch (...) {        _TRACE("failed to read GeneMark file: unknown error");    }#endif}CGeneMarkLoader::EFileFormatCGeneMarkLoader::x_RecognizeFormat(const string& fname){    CNcbiIfstream istr( fname.c_str() );    char buf[ GENEMARK_MAXLINE +1 ];    istr.getline(buf, GENEMARK_MAXLINE);    if( strstr(buf,"r=-1.") || strstr(buf,"r=-0.") ) return fGlimmer2;    for(int i=1; (i < 30) && !istr.eof() ; ++i)    {        istr.getline(buf, GENEMARK_MAXLINE);        if(strstr(buf, "Strand"))        {            if(strstr(buf,"Frame"))    return fGeneMark;            if(strstr(buf,"RightEnd")) return fGeneMarkHMM;        }    }    return fUnknownFormat;}CRef<CSeq_annot>CGeneMarkLoader::x_LoadGlimmer2File(const string&  fname,                                    const CSeq_id& id){    CNcbiIfstream istr(fname.c_str());    CRef<CSeq_annot> annot( new CSeq_annot() );    annot->AddName("Glimmer2 predictions");    list< CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable();     char buf[ GENEMARK_MAXLINE +1 ];    while( istr.getline(buf, GENEMARK_MAXLINE) )    {        CNcbiIstrstream Lstr( buf );        int     gene_number = 0, glim_from = 0, glim_to = 0;        Lstr >> gene_number;        Lstr >> glim_from;        Lstr >> glim_to;        char strand = 'U';        for( ; (strand != '-') && (strand != '+'); Lstr >> strand );        TSeqPos LeftEnd, RightEnd;        if(strand == '-')        {            LeftEnd  = glim_to - 3;            RightEnd = glim_from;        }else{                         // direct            LeftEnd  = glim_from;            RightEnd = glim_to + 3;        }        if(LeftEnd > RightEnd) // Glimmer prediction over zero-point        {            // can't handle yet            LOG_POST(Info << "CGeneMarkLoader::Can't handle Glimmer's prediction over zero-point, skipped.");            continue;        }        CRef<CSeq_feat> feat(new CSeq_feat());        feat->SetComment() = "Glimmer2 pred #" + NStr::IntToString(gene_number) ;        CSeq_interval& floc = feat->SetLocation().SetInt();        floc.SetFrom(LeftEnd- 1);        floc.SetTo(RightEnd - 1);         floc.SetStrand((strand == '-') ? eNa_strand_minus : eNa_strand_plus);        floc.SetId().Assign(id);    // floc.SetId().SetGi( NStr::StringToInt(m_SeqId) );        CSeqFeatData& fdata = feat->SetData();        CCdregion& cdreg = fdata.SetCdregion();        cdreg.SetFrame(CCdregion::eFrame_one);        list< CRef< CGenetic_code::C_E > >& gcode = cdreg.SetCode().Set();        CRef< CGenetic_code::C_E > ce(new CGenetic_code::C_E);        ce->SetId(11);                                        // TSE=1; seq=1; feat=1        gcode.push_back(ce);        ftable.push_back(feat);    }    return annot;}CRef<CSeq_annot>CGeneMarkLoader::x_LoadGeneMarkHmmFile(const string& fname,                                       const CSeq_id& id){    CNcbiIfstream istr(fname.c_str());    CRef<CSeq_annot> annot( new CSeq_annot() );

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?