find_orfs.cpp
来自「ncbi源码」· C++ 代码 · 共 396 行
CPP
396 行
/* * =========================================================================== * PRODUCTION $Log: find_orfs.cpp,v $ * PRODUCTION Revision 1000.5 2004/06/01 20:54:59 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.26 * PRODUCTION * =========================================================================== *//* $Id: find_orfs.cpp,v 1000.5 2004/06/01 20:54:59 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Josh Cherry * * File Description: simple gbench plugin for finding ORFs * */#include <ncbi_pch.hpp>#include "find_orfs.hpp"#include <algo/sequence/make_cdr_prods.hpp>#include <algo/sequence/orf.hpp>#include <gui/core/plugin_utils.hpp>#include <gui/core/version.hpp>#include <gui/dialogs/col/multi_col_dlg.hpp>#include <gui/plugin/PluginCommandSet.hpp>#include <gui/plugin/PluginInfo.hpp>#include <gui/plugin/PluginReply.hpp>#include <gui/plugin/PluginRequest.hpp>#include <gui/plugin/PluginValueConstraint.hpp>#include <gui/objutils/utils.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CAlgoPlugin_FindOrfs::~CAlgoPlugin_FindOrfs(){}// standard plugin announce bopilerplatevoid CAlgoPlugin_FindOrfs::GetInfo(CPluginInfo& info){ info.Reset(); // version info macro info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0, string(__DATE__) + " " + string(__TIME__), "CAlgoPlugin_FindOrfs", "Search/Find Open Reading Frames", "Find open reading frames in a DNA sequence", ""); // command info CPluginCommandSet& cmds = info.SetCommands(); CPluginCommand& args = cmds.AddAlgoCommand(eAlgoCommand_run); args.AddArgument("locs", "Locations to evaluate", CSeq_loc::GetTypeInfo(), CPluginArg::TData::e_Array); args.SetConstraint("locs", (*CPluginValueConstraint::CreateSeqMol(), CSeq_inst::eMol_na, CSeq_inst::eMol_dna, CSeq_inst::eMol_rna)); args.AddDefaultArgument("min_length_codons", "Minimum number of sense codons", CPluginArg::eInteger, "100"); // genetic code argument const CGenetic_code_table& code_table = CGen_code_table::GetCodeTable(); const CGenetic_code_table::Tdata& codes = code_table.Get(); args.AddDefaultArgument("genetic_code", "Genetic code", CPluginArg::eString, codes.front()->GetName()); CPluginValueConstraint *code_list = CPluginValueConstraint::CreateSet(); ITERATE (CGenetic_code_table::Tdata, code, codes) { code_list->SetSet().push_back((*code)->GetName()); } args.SetConstraint("genetic_code", *code_list); }void CAlgoPlugin_FindOrfs::RunCommand(CPluginMessage& msg){ const CPluginCommand& args = msg.GetRequest().GetCommand(); CPluginReply& reply = msg.SetReply(); _TRACE("CAlgoPlugin_FindOrfs::RunCommand()"); if ( !m_Dialog.get() ) { m_Dialog.reset(new CMultiColDlg()); m_Dialog->SetWindowSize(500, 450); m_Dialog->SetTitle("Open Reading Frames"); m_Dialog->SetColumn(0, "Sequence", FL_ALIGN_LEFT, 0.5f); m_Dialog->SetColumn(1, "Location", FL_ALIGN_LEFT, 0.5f); m_Dialog->SetColumn(2, "Strand", FL_ALIGN_CENTER, 0.25f); m_Dialog->SetColumn(3, "From", FL_ALIGN_CENTER, 0.5f); m_Dialog->SetColumn(4, "To", FL_ALIGN_CENTER, 0.5f); m_Dialog->SetColumn(5, "Sense Codons", FL_ALIGN_CENTER, 0.5f); } // clear any previous contents m_Dialog->SetRows(0); int row = 0; plugin_args::TLocList locs; GetArgValue(args["locs"], locs); int min_length_codons = args["min_length_codons"].AsInteger(); string genetic_code_name = args["genetic_code"].AsString(); ITERATE (plugin_args::TLocList, iter, locs) { const CSeq_loc& loc = *iter->second; const IDocument& doc = *iter->first; // find the best ID for this bioseq try { CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc); // get sequence vector CSeqVector vec = handle.GetSequenceView(loc, CBioseq_Handle::eViewConstructed, CBioseq_Handle::eCoding_Ncbi); string& id_str = m_Dialog->SetCell(row, 0); string& loc_str = m_Dialog->SetCell(row, 1); const CSeq_id& best_id = sequence::GetId(handle, sequence::eGetId_Best); id_str.erase(); best_id.GetLabel(&id_str); loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope()); // place to store orfs vector< CRef<CSeq_loc> > orfs; // find some ORFs COrf::FindOrfs(vec, orfs, min_length_codons * 3, x_DecodeGeneticCode(genetic_code_name)); // translate our locs to our parent location NON_CONST_ITERATE (vector< CRef<CSeq_loc> >, iter, orfs) { (**iter).SetId(sequence::GetId(loc)); *iter = CSeqUtils::RemapChildToParent(loc, **iter); } // make an annot CRef<CSeq_id> this_id (const_cast<CSeq_id*>(&sequence::GetId(loc))); CRef<CSeq_annot> annot = COrf::MakeCDSAnnot(orfs, x_DecodeGeneticCode(genetic_code_name)); // add description to annot annot->AddName("Open reading frames"); string comment = string("Open reading frames containing at least ") + NStr::IntToString(min_length_codons) + " sense codons using " + genetic_code_name + " genetic code"; annot->AddComment(comment); // make protein sequences CRef<CBioseq_set> product_set = CMakeCdrProds::MakeCdrProds(annot, handle); reply.AddObject(doc, *product_set); reply.AddObject(doc, *annot); /** CRef<CSeq_entry> new_entry(new CSeq_entry); new_entry->SetSet(*product_set); doc.GetScope().AddTopLevelSeqEntry(*new_entry); **/ // attach annot to doc //const_cast<IDocument&>(doc).AttachAnnot(*annot); // in order to build dialog efficiently, // pre-allocate one line for each ORF m_Dialog->SetRows(row + orfs.size()); ITERATE (vector< CRef<CSeq_loc> >, loc_iter, orfs) { const CSeq_loc& orf = **loc_iter; // // add ORFs to dialog // ENa_strand strand = sequence::GetStrand(orf); if (strand == eNa_strand_minus) { m_Dialog->SetCell(row, 2) = "-"; } else { m_Dialog->SetCell(row, 2) = "+"; } m_Dialog->SetCell(row, 3) = NStr::IntToString(orf.GetTotalRange().GetFrom() + 1); m_Dialog->SetCell(row, 4) = NStr::IntToString(orf.GetTotalRange().GetTo() + 1); // ORF may or may not include a stop codon. // If it does, this must be subtracted // in computing the number of sense codons. int sense_codon_count = sequence::GetLength(orf); sense_codon_count /= 3; sense_codon_count -= 1; if ((strand == eNa_strand_plus && orf.IsPartialRight()) || (strand == eNa_strand_minus && orf.IsPartialLeft())) { ++sense_codon_count; } m_Dialog->SetCell(row, 5) = NStr::IntToString(sense_codon_count); ++row; } } catch (CException& e) { LOG_POST(Error << "error processing location in ORF finder: " << e.what()); string str = CPluginUtils::GetLabel(loc, &doc.GetScope()); LOG_POST(Error << "Error processing location " << str); } catch (exception& e) { LOG_POST(Error << "error processing location in ORF finder: " << e.what()); string str = CPluginUtils::GetLabel(loc, &doc.GetScope()); LOG_POST(Error << "Error processing location " << str); }#ifndef _DEBUG catch (...) { string str = CPluginUtils::GetLabel(loc, &doc.GetScope()); LOG_POST(Error << "Error processing location " << str); }#endif } // update all views //CDocManager::UpdateAllViews(); // // prepare our dialog box // m_Dialog->SetLabel(string("ORFs ") + NStr::IntToString(min_length_codons) + " codons or longer" + " using " + genetic_code_name + " genetic code"); m_Dialog->Show(); reply.AddAction(CPluginReplyAction::e_Add_to_document); reply.SetStatus(eMessageStatus_success);}// figure out the id of the genetic code the user wantsint CAlgoPlugin_FindOrfs::x_DecodeGeneticCode(const string& s){ const CGenetic_code_table& code_table = CGen_code_table::GetCodeTable(); const CGenetic_code_table::Tdata& codes = code_table.Get(); ITERATE (CGenetic_code_table::Tdata, code, codes) { if ((*code)->GetName() == s) { return (*code)->GetId(); } } // if we got here, nothing matched NCBI_THROW(CException, eUnknown, "CAlgoPlugin_FindOrfs: no genetic code matched " + s);}END_NCBI_SCOPE/* * =========================================================================== * $Log: find_orfs.cpp,v $ * Revision 1000.5 2004/06/01 20:54:59 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.26 * * Revision 1.26 2004/05/21 22:27:46 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.25 2004/05/03 13:05:42 dicuccio * gui/utils --> gui/objutils where needed * * Revision 1.24 2004/03/05 17:35:07 dicuccio * Use CGenetic_code_table typedefs to ease syntax. Use sequence::GetId() instead * of CSeq_id::GetStringDescr() * * Revision 1.23 2004/01/27 18:37:41 dicuccio * Code clean-up. Use standard names for plugins. Removed unnecessary #includes * * Revision 1.22 2004/01/07 15:50:36 dicuccio * Adjusted for API change in CPluginUtils::GetLabel(). Standardized exception * reporting in algorithms. * * Revision 1.21 2003/11/24 15:45:26 dicuccio * Renamed CVersion to CPluginVersion * * Revision 1.20 2003/11/18 17:48:36 dicuccio * Added standard processing of return values * * Revision 1.19 2003/11/10 16:51:06 jcherry * Added generation of protein sequences for orfs * * Revision 1.18 2003/11/06 20:12:12 dicuccio * Cleaned up handling of USING_SCOPE - removed from all headers * * Revision 1.17 2003/11/04 17:49:22 dicuccio * Changed calling parameters for plugins - pass CPluginMessage instead of paired * CPluginCommand/CPluginReply * * Revision 1.16 2003/10/27 17:46:48 dicuccio * Removed dead #includes * * Revision 1.15 2003/10/15 21:51:11 jcherry * Don't set ids with MakeCDSAnnot; it doesn't work, and it would be * redundant anyway. * * Revision 1.14 2003/10/15 13:40:26 dicuccio * Mkae sure to set the 'id' for the seq-locs before calling RemapChildToParent() * * Revision 1.13 2003/10/14 16:24:37 dicuccio * Correctly remap new feature locations through the parent location to the master * sequence * * Revision 1.12 2003/10/07 13:47:00 dicuccio * Renamed CPluginURL* to CPluginValue* * * Revision 1.11 2003/09/30 13:40:49 dicuccio * Minor code clean-up: use container typedefs from ASN.1 generated classes * * Revision 1.10 2003/09/25 17:21:35 jcherry * Added name to annot * * Revision 1.9 2003/09/04 19:27:53 jcherry * Made an ORF include the stop codon, and marked certain ORFs as * partial. Put ability to construct a feature table into COrf. * * Revision 1.8 2003/09/04 14:05:24 dicuccio * Use IDocument instead of CDocument * * Revision 1.7 2003/09/03 14:46:53 rsmith * change namespace name from args to plugin_args to avoid clashes with variable names. * * Revision 1.6 2003/08/21 12:03:07 dicuccio * Make use of new typedef in plugin_utils.hpp for argument values. * * Revision 1.5 2003/08/19 20:47:52 jcherry * Use SetSet().pushback() rather than comma operator for adding * constraints in loop (less bizarre-looking) * * Revision 1.4 2003/08/19 18:36:59 jcherry * Allowed user to specify genetic code * * Revision 1.3 2003/08/18 19:24:15 jcherry * Moved orf and seq_match to algo/sequence * * Revision 1.2 2003/08/18 18:01:58 jcherry * Changed COrf::FindOrfs to produce a vector of CRef<CSeq_loc>. * Added version of FindOrfs that takes a CSeqVector. * * Revision 1.1 2003/08/14 17:59:22 jcherry * Initial version * * =========================================================================== */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?