genemark_loader.cpp

来自「ncbi源码」· C++ 代码 · 共 564 行 · 第 1/2 页

CPP
564
字号
    annot->AddName("GeneMark.hmm predictions");    list< CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable();     char buf[ GENEMARK_MAXLINE +1 ];    while( istr.getline(buf, GENEMARK_MAXLINE) )  // find and skip file header    {        if(strstr(buf, "Strand") && strstr(buf,"RightEnd"))        {            istr.getline(buf, GENEMARK_MAXLINE); // skip another one line            break;        }    }    while( istr.getline(buf, GENEMARK_MAXLINE) )    {        CNcbiIstrstream Lstr( buf );      // LOG_POST(Info << "Parsing line:" << buf);        int     gene_number = 0;        Lstr >> gene_number;        char strand = 'U';        for(; (strand != '-') && (strand != '+'); Lstr >> strand);        string aLeftEnd;        TSeqPos LeftEnd, RightEnd;        Lstr >> aLeftEnd;        Lstr >>  RightEnd;        if(aLeftEnd[0] == '<') aLeftEnd[0] = ' ';         LeftEnd = NStr::StringToInt(aLeftEnd);        //  _TRACE("parsed line: "<< gene_number <<" from: "<< LeftEnd <<" to: "<< RightEnd <<" starnd: "<< strand);        CRef<CSeq_feat> feat(new CSeq_feat);        feat->SetComment() = "GeneMark.hmm pred #" + NStr::IntToString(gene_number) ;        CSeq_interval& floc = feat->SetLocation().SetInt();        floc.SetFrom(LeftEnd- 1);        floc.SetTo(RightEnd - 1);         floc.SetStrand((strand == '-') ? eNa_strand_minus : eNa_strand_plus);        floc.SetId().Assign(id);    // floc.SetId().SetGi( NStr::StringToInt(m_SeqId) );        CSeqFeatData& fdata = feat->SetData();        CCdregion& cdreg = fdata.SetCdregion();        cdreg.SetFrame(CCdregion::eFrame_one);        list< CRef< CGenetic_code::C_E > >& gcode = cdreg.SetCode().Set();        CRef< CGenetic_code::C_E > ce(new CGenetic_code::C_E);        ce->SetId(11);                                        // TSE=1; seq=1; feat=1        gcode.push_back(ce);        ftable.push_back(feat);    }    return annot;}CRef<CSeq_annot>CGeneMarkLoader::x_LoadGeneMarkFile(const string& fname, const CSeq_id& id){    char buf[ GENEMARK_MAXLINE +1 ];    CNcbiIfstream istr(fname.c_str());    while(istr.getline(buf, GENEMARK_MAXLINE))  // find and skip file header    {        if(strstr(buf, "Strand") && strstr(buf,"Frame"))        {            istr.getline(buf, GENEMARK_MAXLINE); // skip  ----- -----            // istr.getline(buf, GENEMARK_MAXLINE); // sometimes NOT empty !            break;        }    }    CRef<CSeq_annot> annot( new CSeq_annot() );    annot->AddName("GeneMark predictions");    list< CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable();    TSeqPos prevStop = 0;    int gene_number  = 0;    CRef<CSeq_feat> feat;    CSeq_interval *floc;    while( istr.getline(buf, GENEMARK_MAXLINE) )    {        if(strstr(buf, "interest")) break; // we have reached "List of Regions of interest"        if(strlen(buf) < 10) continue;        // LOG_POST(Info << "CGeneMarkLoader::Load: parsing line: " << buf );        CNcbiIstrstream Lstr( buf );        string  strand;        TSeqPos LeftEnd, RightEnd, curStop;        Lstr >> LeftEnd;        Lstr >> RightEnd;        Lstr >> strand;        bool  is_complementary = (strand.find("complement") != string::npos);        curStop = is_complementary ? LeftEnd : RightEnd;        if(curStop != prevStop)  //  new lines-group        {            prevStop = curStop;            ++gene_number;            feat = new CSeq_feat();            feat->SetComment() = "GeneMark pred #" + NStr::IntToString(gene_number) ;            ftable.push_back(feat);            CSeqFeatData& fdata = feat->SetData();            CCdregion& cdreg = fdata.SetCdregion();            cdreg.SetFrame(CCdregion::eFrame_one);            list< CRef< CGenetic_code::C_E > >& gcode = cdreg.SetCode().Set();            CRef< CGenetic_code::C_E > ce(new CGenetic_code::C_E);            ce->SetId(11);                                        // TSE=1; seq=1; feat=1            gcode.push_back(ce);            floc = & feat->SetLocation().SetInt();            floc->SetFrom(LeftEnd-1);            floc->SetTo(RightEnd -1);             floc->SetStrand(is_complementary ? eNa_strand_minus : eNa_strand_plus);            floc->SetId().Assign(id);   // floc->SetId().SetGi( NStr::StringToInt(m_SeqId) );        }        else    // another start of previous gene        {                if(is_complementary)            {                floc->SetFuzz_to().SetAlt().push_back( RightEnd -1);            }            else            {                floc->SetFuzz_from().SetAlt().push_back( LeftEnd -1); // LOG_POST(Info << "Left:"<<LeftEnd);            }          }    }    return annot;}END_NCBI_SCOPE/* * ===================================================================== * $Log: genemark_loader.cpp,v $ * Revision 1000.5  2004/06/01 20:58:40  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35 * * Revision 1.35  2004/05/21 22:27:48  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.34  2004/03/11 17:44:00  dicuccio * Use new file loader dialog * * Revision 1.33  2003/12/22 20:29:47  dernovoy * skip Glimmer's prediction over zero-point * * Revision 1.32  2003/12/16 20:24:55  dernovoy * Path to precomputed predictions should work on both Windows and UNIX inside NCBI * * Revision 1.31  2003/12/10 22:53:24  dernovoy * defaults directory and file extensions added * * Revision 1.30  2003/12/09 23:22:14  dernovoy * menu string changed : coma instead of slash * * Revision 1.29  2003/12/09 21:46:00  dernovoy * Glimmer2 output loader was added * * Revision 1.28  2003/11/24 15:45:40  dicuccio * Renamed CVersion to CPluginVersion * * Revision 1.27  2003/11/18 17:49:26  dicuccio * Added standard processing of return values * * Revision 1.26  2003/11/04 17:49:25  dicuccio * Changed calling parameters for plugins - pass CPluginMessage instead of paired * CPluginCommand/CPluginReply * * Revision 1.25  2003/10/10 17:19:33  dicuccio * Added Import() interface.  Removed dead Save() interfaces * * Revision 1.24  2003/10/07 13:47:06  dicuccio * Renamed CPluginURL* to CPluginValue* * * Revision 1.23  2003/09/17 16:27:28  dicuccio * Removed load command * * Revision 1.22  2003/09/04 14:51:59  dicuccio * Use IDocument instead of CDocument * * Revision 1.21  2003/07/14 11:17:25  shomrat * Plugin messageing system related changes * * Revision 1.20  2003/06/25 17:02:59  dicuccio * Split CPluginHandle into a handle (pointer-to-implementation) and * implementation file.  Lots of #include file clean-ups. * * Revision 1.19  2003/06/20 14:52:58  dicuccio * Revised plugin registration - moved GetInfo() into the plugin handler * * Revision 1.18  2003/05/19 13:40:45  dicuccio * Moved gui/core/plugin/ -> gui/plugin/.  Merged core libraries into libgui_core. * Removed old, unused dialog box. * * Revision 1.17  2003/04/24 16:39:29  dicuccio * Updated to reflect changes in plugin API * * Revision 1.16  2003/02/24 13:03:16  dicuccio * Renamed classes in plugin spec: *     CArgSeg --> CPluginArgSet *     CArgument --> CPluginArg *     CPluginArgs --> CPluginCommand *     CPluginCommands --> CPluginCommandSet * * Revision 1.15  2003/02/20 19:49:56  dicuccio * Created new plugin architecture, based on ASN.1 spec.  Moved GBENCH frameowrk * over to use new plugin architecture. * * Revision 1.14  2003/02/06 18:48:36  dicuccio * Made 'catch (...)' conditional for non-debug builds * * Revision 1.13  2003/01/15 19:47:37  dernovoy* accession can be used for seq_id in features (was: only gi).** Revision 1.12  2003/01/13 13:10:07  dicuccio* Namespace clean-up.  Retired namespace gui -> converted all to namespace ncbi.* Moved all FLUID-generated code into namespace ncbi.** Revision 1.11  2003/01/08 21:17:59  dernovoy* Feature's location fixed (from local to gi),* GeneMark coordinates translated to ncbi format (0 - (Len-1))    *    * Revision 1.10  2003/01/06 21:03:12  dernovoy    * Support for Alternative starts (fuzz-from/to) of GeneMark's output    *    * Revision 1.9  2003/01/02 21:41:37  dernovoy    * Load of genemark output added, the farthest starts taken for features    *    * Revision 1.8  2003/01/02 19:58:38  dernovoy    * fix comparing stream with int    *    * Revision 1.7  2002/12/31 19:45:20  dernovoy    * Addded support for genemarkHMM start positions started with symbol '<'    *    * Revision 1.6  2002/12/30 17:48:29  dicuccio* Added mechanism for data loader plugins to announce supported modes of* operation (load, import, save currently)    *    * Revision 1.5  2002/12/30 15:47:10  dernovoy    * TRACE outputs added, any doc's updates comments out    *    * Revision 1.4  2002/12/26 20:50:25  dernovoy    * *** empty log message ***    *    * Revision 1.3  2002/12/26 20:48:59  dernovoy    * Log output    ** Revision 1.2  2002/12/26 17:45:55  dicuccio* Reformatted code (reindent)    *    * Revision 1.1  2002/12/26 17:12:38  dernovoy    * Initial revision.    *    * =====================================================================    */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?