nmer_repeats.cpp
来自「ncbi源码」· C++ 代码 · 共 291 行
CPP
291 行
/* * =========================================================================== * PRODUCTION $Log: nmer_repeats.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 20:55:10 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//* $Id: nmer_repeats.cpp,v 1000.1 2004/06/01 20:55:10 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Josh Cherry * * File Description: gbench plugin for finding nmer repeats * */#include <ncbi_pch.hpp>#include "nmer_repeats.hpp"#include <algo/sequence/find_pattern.hpp>#include <gui/core/plugin_utils.hpp>#include <gui/core/version.hpp>#include <gui/dialogs/col/multi_col_dlg.hpp>#include <gui/plugin/PluginCommandSet.hpp>#include <gui/plugin/PluginInfo.hpp>#include <gui/plugin/PluginReply.hpp>#include <gui/plugin/PluginRequest.hpp>#include <gui/plugin/PluginValueConstraint.hpp>#include <gui/objutils/utils.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CAlgoPlugin_NmerRepeats::~CAlgoPlugin_NmerRepeats(){}// standard plugin announce bopilerplatevoid CAlgoPlugin_NmerRepeats::GetInfo(CPluginInfo& info){ info.Reset(); // version info macro info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0, string(__DATE__) + " " + string(__TIME__), "CAlgoPlugin_NmerRepeats", "Search/Find n-mer repeats", "Find di-, tri-, and tetra-nucleotide repeats", ""); // command info CPluginCommandSet& cmds = info.SetCommands(); CPluginCommand& args = cmds.AddAlgoCommand(eAlgoCommand_run); args.AddArgument("locs", "Locations to evaluate", CSeq_loc::GetTypeInfo(), CPluginArg::TData::e_Array); args.SetConstraint("locs", (*CPluginValueConstraint::CreateSeqMol(), CSeq_inst::eMol_na, CSeq_inst::eMol_dna, CSeq_inst::eMol_rna)); args.AddDefaultArgument("dinuc_min", "Minimum number of dinuclotide units", CPluginArg::eInteger, "5"); args.AddDefaultArgument("trinuc_min", "Minimum number of trinuclotide units", CPluginArg::eInteger, "5"); args.AddDefaultArgument("tetranuc_min", "Minimum number of tetranuclotide units", CPluginArg::eInteger, "5");}void CAlgoPlugin_NmerRepeats::RunCommand(CPluginMessage& msg){ const CPluginCommand& args = msg.GetRequest().GetCommand(); CPluginReply& reply = msg.SetReply(); _TRACE("CAlgoPlugin_NmerRepeats::Run()"); vector<int> minima(5); minima[2] = args["dinuc_min"].AsInteger(); minima[3] = args["trinuc_min"].AsInteger(); minima[4] = args["tetranuc_min"].AsInteger(); if ( !m_Dialog.get() ) { m_Dialog.reset(new CMultiColDlg()); m_Dialog->SetWindowSize(600, 350); m_Dialog->SetTitle("n-mer Nucleotide Repeats"); m_Dialog->SetLabel("Search results:"); m_Dialog->SetColumn(0, "Sequence"); m_Dialog->SetColumn(1, "Location"); m_Dialog->SetColumn(2, "Position", FL_ALIGN_CENTER, 2.0); m_Dialog->SetColumn(3, "Repeat", FL_ALIGN_LEFT); } // clear any previous contents m_Dialog->SetRows(0); vector<TSeqPos> starts; vector<TSeqPos> ends; int row = 0; plugin_args::TLocList locs; GetArgValue(args["locs"], locs); ITERATE (plugin_args::TLocList, iter, locs) { const CSeq_loc& loc = *iter->second; const IDocument& doc = *iter->first; // find the best ID for this bioseq try { CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc); CSeqVector vec = handle.GetSequenceView(loc, CBioseq_Handle::eViewConstructed, CBioseq_Handle::eCoding_Iupac); string seq; vec.GetSeqData( (TSeqPos) 0, vec.size(), seq ); string& id_str = m_Dialog->SetCell(row, 0); string& loc_str = m_Dialog->SetCell(row, 1); const CSeq_id& best_id = sequence::GetId(handle, sequence::eGetId_Best); id_str.erase(); best_id.GetLabel(&id_str); loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope()); CRef<CSeq_annot> annot(new CSeq_annot()); string rep_unit; int rep_num; string rep_summary; for (unsigned int n = 2; n <= 4; ++n) { starts.clear(); ends.clear(); CFindPattern::FindNucNmerRepeats(seq, n, minima[n], starts, ends); // preallocate rows in dialog for speed m_Dialog->SetRows(row + starts.size()); for(unsigned int k = 0; k < starts.size(); k++) { rep_unit = seq.substr(starts[k], n); if (rep_unit.find_first_not_of(rep_unit.substr(0, 1), 1) == string::npos) { // skip it if it's a monomer repeat continue; } if (n == 4) { // check whether it's a dimer repeat too if (rep_unit[2] == rep_unit[0] && rep_unit[3] == rep_unit[1]) { // if so, skip it continue; } } rep_num = (ends[k] - starts[k] + 1) / n; rep_summary = "(" + rep_unit + ")" + NStr::IntToString(rep_num); string& pos_str = m_Dialog->SetCell(row, 2); // 1-based indexing for dialog pos_str = NStr::IntToString(starts[k] + 1) + " - " + NStr::IntToString(ends[k] + 1); m_Dialog->SetCell(row, 3) = rep_summary; ++row; // create feature CRef<CSeq_feat> feat(new CSeq_feat()); // set correct location {{ CSeq_loc& floc = feat->SetLocation(); floc.SetInt().SetId().Assign(sequence::GetId(loc)); floc.SetInt().SetFrom(starts[k]); floc.SetInt().SetTo(ends[k]); floc.SetInt().SetStrand(eNa_strand_plus); CRef<CSeq_loc> new_loc = CSeqUtils::RemapChildToParent(loc, floc); feat->SetLocation(*new_loc); }} // set feature data feat->SetData().SetRegion() = "Repeat: " + rep_summary; // save in annot annot->SetData().SetFtable().push_back(feat); } } // add description to annot annot->AddName("n-mer Repeats"); reply.AddObject(doc, *annot); } catch (CException& e) { string str = CPluginUtils::GetLabel(loc, &doc.GetScope()); LOG_POST(Error << "Error processing location " << str << ": " << e.what()); }#ifndef _DEBUG catch (...) { string str = CPluginUtils::GetLabel(loc, &doc.GetScope()); LOG_POST(Error << "Error processing location " << str); }#endif } // // prepare our dialog box // m_Dialog->SetRows(row); m_Dialog->Show(); reply.AddAction(CPluginReplyAction::e_Add_to_document); reply.SetStatus(eMessageStatus_success);}END_NCBI_SCOPE/* * =========================================================================== * $Log: nmer_repeats.cpp,v $ * Revision 1000.1 2004/06/01 20:55:10 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * * Revision 1.6 2004/05/21 22:27:46 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.5 2004/05/03 13:05:42 dicuccio * gui/utils --> gui/objutils where needed * * Revision 1.4 2004/03/05 17:35:37 dicuccio * Use sequence::GetId() instead of CSeq_id::GetStringDescr() * * Revision 1.3 2004/01/27 18:37:47 dicuccio * Code clean-up. Use standard names for plugins. Removed unnecessary #includes * * Revision 1.2 2004/01/07 15:50:37 dicuccio * Adjusted for API change in CPluginUtils::GetLabel(). Standardized exception * reporting in algorithms. * * Revision 1.1 2003/12/17 17:35:09 jcherry * Initial version * * =========================================================================== */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?