find_orfs.cpp

来自「ncbi源码」· C++ 代码 · 共 396 行

CPP
396
字号
/* * =========================================================================== * PRODUCTION $Log: find_orfs.cpp,v $ * PRODUCTION Revision 1000.5  2004/06/01 20:54:59  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.26 * PRODUCTION * =========================================================================== *//*  $Id: find_orfs.cpp,v 1000.5 2004/06/01 20:54:59 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Josh Cherry * * File Description:  simple gbench plugin for finding ORFs * */#include <ncbi_pch.hpp>#include "find_orfs.hpp"#include <algo/sequence/make_cdr_prods.hpp>#include <algo/sequence/orf.hpp>#include <gui/core/plugin_utils.hpp>#include <gui/core/version.hpp>#include <gui/dialogs/col/multi_col_dlg.hpp>#include <gui/plugin/PluginCommandSet.hpp>#include <gui/plugin/PluginInfo.hpp>#include <gui/plugin/PluginReply.hpp>#include <gui/plugin/PluginRequest.hpp>#include <gui/plugin/PluginValueConstraint.hpp>#include <gui/objutils/utils.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CAlgoPlugin_FindOrfs::~CAlgoPlugin_FindOrfs(){}// standard plugin announce bopilerplatevoid CAlgoPlugin_FindOrfs::GetInfo(CPluginInfo& info){    info.Reset();        // version info macro    info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,                 string(__DATE__) + " " + string(__TIME__),                 "CAlgoPlugin_FindOrfs", "Search/Find Open Reading Frames",                 "Find open reading frames in a DNA sequence",                 "");    // command info    CPluginCommandSet& cmds = info.SetCommands();    CPluginCommand&    args = cmds.AddAlgoCommand(eAlgoCommand_run);    args.AddArgument("locs", "Locations to evaluate",                     CSeq_loc::GetTypeInfo(),                     CPluginArg::TData::e_Array);    args.SetConstraint("locs",                       (*CPluginValueConstraint::CreateSeqMol(),                        CSeq_inst::eMol_na,                        CSeq_inst::eMol_dna,                        CSeq_inst::eMol_rna));    args.AddDefaultArgument("min_length_codons",                            "Minimum number of sense codons",                            CPluginArg::eInteger, "100");    // genetic code argument    const CGenetic_code_table& code_table = CGen_code_table::GetCodeTable();    const CGenetic_code_table::Tdata& codes = code_table.Get();    args.AddDefaultArgument("genetic_code", "Genetic code",                            CPluginArg::eString, codes.front()->GetName());    CPluginValueConstraint *code_list = CPluginValueConstraint::CreateSet();    ITERATE (CGenetic_code_table::Tdata, code, codes) {        code_list->SetSet().push_back((*code)->GetName());    }    args.SetConstraint("genetic_code", *code_list);                                       }void CAlgoPlugin_FindOrfs::RunCommand(CPluginMessage& msg){    const CPluginCommand& args = msg.GetRequest().GetCommand();    CPluginReply& reply = msg.SetReply();    _TRACE("CAlgoPlugin_FindOrfs::RunCommand()");        if ( !m_Dialog.get() ) {        m_Dialog.reset(new CMultiColDlg());        m_Dialog->SetWindowSize(500, 450);        m_Dialog->SetTitle("Open Reading Frames");                m_Dialog->SetColumn(0, "Sequence", FL_ALIGN_LEFT, 0.5f);        m_Dialog->SetColumn(1, "Location", FL_ALIGN_LEFT, 0.5f);        m_Dialog->SetColumn(2, "Strand", FL_ALIGN_CENTER, 0.25f);        m_Dialog->SetColumn(3, "From", FL_ALIGN_CENTER, 0.5f);        m_Dialog->SetColumn(4, "To", FL_ALIGN_CENTER, 0.5f);        m_Dialog->SetColumn(5, "Sense Codons", FL_ALIGN_CENTER, 0.5f);    }    // clear any previous contents    m_Dialog->SetRows(0);    int row = 0;    plugin_args::TLocList locs;    GetArgValue(args["locs"], locs);    int min_length_codons = args["min_length_codons"].AsInteger();    string genetic_code_name = args["genetic_code"].AsString();    ITERATE (plugin_args::TLocList, iter, locs) {        const CSeq_loc&  loc = *iter->second;        const IDocument& doc = *iter->first;        // find the best ID for this bioseq        try {            CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc);            // get sequence vector            CSeqVector vec =                handle.GetSequenceView(loc,                                       CBioseq_Handle::eViewConstructed,                                       CBioseq_Handle::eCoding_Ncbi);            string& id_str  = m_Dialog->SetCell(row, 0);            string& loc_str = m_Dialog->SetCell(row, 1);            const CSeq_id& best_id =                sequence::GetId(handle, sequence::eGetId_Best);            id_str.erase();            best_id.GetLabel(&id_str);            loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope());            // place to store orfs            vector< CRef<CSeq_loc> > orfs;            // find some ORFs            COrf::FindOrfs(vec, orfs,                           min_length_codons * 3,                           x_DecodeGeneticCode(genetic_code_name));            // translate our locs to our parent location            NON_CONST_ITERATE (vector< CRef<CSeq_loc> >, iter, orfs) {                (**iter).SetId(sequence::GetId(loc));                *iter = CSeqUtils::RemapChildToParent(loc, **iter);            }            // make an annot            CRef<CSeq_id> this_id                (const_cast<CSeq_id*>(&sequence::GetId(loc)));            CRef<CSeq_annot> annot =                COrf::MakeCDSAnnot(orfs,                                   x_DecodeGeneticCode(genetic_code_name));            // add description to annot            annot->AddName("Open reading frames");            string comment =                string("Open reading frames containing at least ") +                NStr::IntToString(min_length_codons) +                " sense codons using " + genetic_code_name +                " genetic code";            annot->AddComment(comment);            // make protein sequences            CRef<CBioseq_set> product_set =                CMakeCdrProds::MakeCdrProds(annot, handle);            reply.AddObject(doc, *product_set);            reply.AddObject(doc, *annot);            /**            CRef<CSeq_entry> new_entry(new CSeq_entry);            new_entry->SetSet(*product_set);            doc.GetScope().AddTopLevelSeqEntry(*new_entry);            **/            // attach annot to doc            //const_cast<IDocument&>(doc).AttachAnnot(*annot);            // in order to build dialog efficiently,            // pre-allocate one line for each ORF            m_Dialog->SetRows(row + orfs.size());            ITERATE (vector< CRef<CSeq_loc> >, loc_iter, orfs) {                const CSeq_loc& orf = **loc_iter;                //                // add ORFs to dialog                //                ENa_strand strand = sequence::GetStrand(orf);                if (strand == eNa_strand_minus) {                    m_Dialog->SetCell(row, 2) = "-";                } else {                    m_Dialog->SetCell(row, 2) = "+";                }                m_Dialog->SetCell(row, 3)                    = NStr::IntToString(orf.GetTotalRange().GetFrom() + 1);                m_Dialog->SetCell(row, 4)                    = NStr::IntToString(orf.GetTotalRange().GetTo() + 1);                // ORF may or may not include a stop codon.                // If it does, this must be subtracted                // in computing the number of sense codons.                int sense_codon_count = sequence::GetLength(orf);                sense_codon_count /= 3;                sense_codon_count -= 1;                if ((strand == eNa_strand_plus   &&  orf.IsPartialRight())  ||                    (strand == eNa_strand_minus  &&  orf.IsPartialLeft())) {                    ++sense_codon_count;                }                m_Dialog->SetCell(row, 5)                    = NStr::IntToString(sense_codon_count);                ++row;            }        }        catch (CException& e) {            LOG_POST(Error << "error processing location in ORF finder: "                     << e.what());            string str = CPluginUtils::GetLabel(loc, &doc.GetScope());            LOG_POST(Error << "Error processing location " << str);        }        catch (exception& e) {            LOG_POST(Error << "error processing location in ORF finder: "                     << e.what());            string str = CPluginUtils::GetLabel(loc, &doc.GetScope());            LOG_POST(Error << "Error processing location " << str);        }#ifndef _DEBUG        catch (...) {            string str = CPluginUtils::GetLabel(loc, &doc.GetScope());            LOG_POST(Error << "Error processing location " << str);        }#endif    }    // update all views    //CDocManager::UpdateAllViews();    //    // prepare our dialog box    //    m_Dialog->SetLabel(string("ORFs ") + NStr::IntToString(min_length_codons)                       + " codons or longer"                        + " using " + genetic_code_name + " genetic code");    m_Dialog->Show();    reply.AddAction(CPluginReplyAction::e_Add_to_document);    reply.SetStatus(eMessageStatus_success);}// figure out the id of the genetic code the user wantsint CAlgoPlugin_FindOrfs::x_DecodeGeneticCode(const string& s){    const CGenetic_code_table& code_table = CGen_code_table::GetCodeTable();    const CGenetic_code_table::Tdata& codes = code_table.Get();    ITERATE (CGenetic_code_table::Tdata, code, codes) {        if ((*code)->GetName() == s) {            return (*code)->GetId();        }    }    // if we got here, nothing matched    NCBI_THROW(CException, eUnknown,               "CAlgoPlugin_FindOrfs: no genetic code matched " + s);}END_NCBI_SCOPE/* * =========================================================================== * $Log: find_orfs.cpp,v $ * Revision 1000.5  2004/06/01 20:54:59  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.26 * * Revision 1.26  2004/05/21 22:27:46  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.25  2004/05/03 13:05:42  dicuccio * gui/utils --> gui/objutils where needed * * Revision 1.24  2004/03/05 17:35:07  dicuccio * Use CGenetic_code_table typedefs to ease syntax.  Use sequence::GetId() instead * of CSeq_id::GetStringDescr() * * Revision 1.23  2004/01/27 18:37:41  dicuccio * Code clean-up.  Use standard names for plugins.  Removed unnecessary #includes * * Revision 1.22  2004/01/07 15:50:36  dicuccio * Adjusted for API change in CPluginUtils::GetLabel().  Standardized exception * reporting in algorithms. * * Revision 1.21  2003/11/24 15:45:26  dicuccio * Renamed CVersion to CPluginVersion * * Revision 1.20  2003/11/18 17:48:36  dicuccio * Added standard processing of return values * * Revision 1.19  2003/11/10 16:51:06  jcherry * Added generation of protein sequences for orfs * * Revision 1.18  2003/11/06 20:12:12  dicuccio * Cleaned up handling of USING_SCOPE - removed from all headers * * Revision 1.17  2003/11/04 17:49:22  dicuccio * Changed calling parameters for plugins - pass CPluginMessage instead of paired * CPluginCommand/CPluginReply * * Revision 1.16  2003/10/27 17:46:48  dicuccio * Removed dead #includes * * Revision 1.15  2003/10/15 21:51:11  jcherry * Don't set ids with MakeCDSAnnot; it doesn't work, and it would be * redundant anyway. * * Revision 1.14  2003/10/15 13:40:26  dicuccio * Mkae sure to set the 'id' for the seq-locs before calling RemapChildToParent() * * Revision 1.13  2003/10/14 16:24:37  dicuccio * Correctly remap new feature locations through the parent location to the master * sequence * * Revision 1.12  2003/10/07 13:47:00  dicuccio * Renamed CPluginURL* to CPluginValue* * * Revision 1.11  2003/09/30 13:40:49  dicuccio * Minor code clean-up: use container typedefs from ASN.1 generated classes * * Revision 1.10  2003/09/25 17:21:35  jcherry * Added name to annot * * Revision 1.9  2003/09/04 19:27:53  jcherry * Made an ORF include the stop codon, and marked certain ORFs as * partial.  Put ability to construct a feature table into COrf. * * Revision 1.8  2003/09/04 14:05:24  dicuccio * Use IDocument instead of CDocument * * Revision 1.7  2003/09/03 14:46:53  rsmith * change namespace name from args to plugin_args to avoid clashes with variable names. * * Revision 1.6  2003/08/21 12:03:07  dicuccio * Make use of new typedef in plugin_utils.hpp for argument values. * * Revision 1.5  2003/08/19 20:47:52  jcherry * Use SetSet().pushback() rather than comma operator for adding * constraints in loop (less bizarre-looking) * * Revision 1.4  2003/08/19 18:36:59  jcherry * Allowed user to specify genetic code * * Revision 1.3  2003/08/18 19:24:15  jcherry * Moved orf and seq_match to algo/sequence * * Revision 1.2  2003/08/18 18:01:58  jcherry * Changed COrf::FindOrfs to produce a vector of CRef<CSeq_loc>. * Added version of FindOrfs that takes a CSeqVector. * * Revision 1.1  2003/08/14 17:59:22  jcherry * Initial version * * =========================================================================== */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?