restriction_sites.cpp
来自「ncbi源码」· C++ 代码 · 共 651 行 · 第 1/2 页
CPP
651 行
/* * =========================================================================== * PRODUCTION $Log: restriction_sites.cpp,v $ * PRODUCTION Revision 1000.5 2004/06/01 20:55:43 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.32 * PRODUCTION * =========================================================================== *//* $Id: restriction_sites.cpp,v 1000.5 2004/06/01 20:55:43 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Josh Cherry * * File Description: gbench plugin for finding restriction sites * */#include <ncbi_pch.hpp>#include "restriction_sites.hpp"#include "rebase.hpp"#include <algo/sequence/restriction.hpp>#include <algo/sequence/seq_match.hpp>#include <algorithm>#include <corelib/ncbiapp.hpp>#include <corelib/ncbireg.hpp>#include <gui/core/plugin_utils.hpp>#include <gui/utils/system_path.hpp>#include <gui/core/version.hpp>#include <gui/dialogs/col/multi_col_dlg.hpp>#include <gui/plugin/PluginCommandSet.hpp>#include <gui/plugin/PluginInfo.hpp>#include <gui/plugin/PluginRequest.hpp>#include <gui/plugin/PluginValueConstraint.hpp>#include <gui/utils/message_box.hpp>#include <gui/objutils/utils.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqfeat/Rsite_ref.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPECAlgoPlugin_RestrictionSites::~CAlgoPlugin_RestrictionSites(){}// standard plugin announce bopilerplatevoid CAlgoPlugin_RestrictionSites::GetInfo(CPluginInfo& info){ info.Reset(); // version info macro info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0, string(__DATE__) + " " + string(__TIME__), "CAlgoPlugin_RestrictionSites", "Search/Find restriction sites", "Search a DNA sequence for restriction sites", ""); // command info CPluginCommandSet& cmds = info.SetCommands(); CPluginCommand& args = cmds.AddAlgoCommand(eAlgoCommand_run); args.AddArgument("locs", "Locations to evaluate", CSeq_loc::GetTypeInfo(), CPluginArg::TData::e_Array); args.SetConstraint("locs", (*CPluginValueConstraint::CreateSeqMol(), CSeq_inst::eMol_na, CSeq_inst::eMol_dna, CSeq_inst::eMol_rna)); args.AddDefaultArgument("which_enzymes", "Which restriction enzymes", CPluginArg::eString, "commercially available"); args.SetConstraint("which_enzymes", (*CPluginValueConstraint::CreateSet(), "commercially available", "prototypes", "all")); args.AddDefaultArgument("combine_isoschizomers", "Combine isoschizomers", CPluginArg::eBoolean, "true"); args.AddArgument("sort_by_number_of_sites", "Sort results by number of (definite) cuts by enzyme", CPluginArg::eBoolean);}// helper functor for sorting CRef<CREnzResult> by the enzyme namestruct SEnzymeNameCompare{ bool operator() (const CRef<CREnzResult>& lhs, const CRef<CREnzResult>& rhs) const { return lhs->GetEnzymeName() < rhs->GetEnzymeName(); }};// helper functor for sorting CREnzyme by the enzyme namestruct SNameCompare{ bool operator() (const CREnzyme& lhs, const CREnzyme& rhs) const { return lhs.GetName() < rhs.GetName(); }};// helper functor for sorting by the number of definite sitesstruct SLessDefSites{ bool operator() (const CRef<CREnzResult>& lhs, const CRef<CREnzResult>& rhs) const { return lhs->GetDefiniteSites().size() < rhs->GetDefiniteSites().size(); }};// helper functor for sorting CRef<CSeq_loc>s by locationstruct SLessSeq_loc{ bool operator() (const CRef<CSeq_loc>& lhs, const CRef<CSeq_loc>& rhs) const { return (lhs->Compare(*rhs) < 0); }};staticvoid s_AddSitesToAnnot(const vector<CRSite>& sites, const CREnzResult& result, CSeq_annot& annot, const CSeq_loc& parent_loc, bool definite = true){ const CSeq_id& id = sequence::GetId(parent_loc); ITERATE (vector<CRSite>, site, sites) { // create feature CRef<CSeq_feat> feat(new CSeq_feat()); // start to set up Rsite feat->SetData().SetRsite().SetDb().SetDb("REBASE"); feat->SetData().SetRsite().SetDb() .SetTag().SetStr("REBASE"); string str(result.GetEnzymeName()); if ( !definite ) { str = "Possible " + str; } feat->SetData().SetRsite().SetStr(str); // // build our location // vector< CRef<CSeq_loc> > locs; // a loc for the recognition site CRef<CSeq_loc> recog_site(new CSeq_loc); recog_site->SetInt().SetFrom(site->GetStart()); recog_site->SetInt().SetTo (site->GetEnd()); recog_site->SetId(id); locs.push_back(recog_site); // locs for the cleavage sites int negative_cut_locs = 0; // count these exceptions ITERATE (vector<int>, cut, site->GetPlusCuts()) { if (*cut >= 0 ) { CRef<CSeq_loc> cut_site(new CSeq_loc); cut_site->SetPnt().SetPoint(*cut); // indicate that the cut is to the "left" cut_site->SetPnt() .SetFuzz().SetLim(CInt_fuzz::eLim_tl); cut_site->SetPnt().SetStrand(eNa_strand_plus); cut_site->SetId(id); locs.push_back(cut_site); } else { negative_cut_locs++; } } ITERATE (vector<int>, cut, site->GetMinusCuts()) { if (*cut >= 0 ) { CRef<CSeq_loc> cut_site(new CSeq_loc); cut_site->SetPnt().SetPoint(*cut); // indicate that the cut is to the "left" cut_site->SetPnt() .SetFuzz().SetLim(CInt_fuzz::eLim_tl); cut_site->SetPnt().SetStrand(eNa_strand_minus); cut_site->SetId(id); locs.push_back(cut_site); } else { negative_cut_locs++; } } // comment for those few cases where there are // cuts before the sequence begins if (negative_cut_locs > 0) { string a_comm = NStr::IntToString(negative_cut_locs) + " cleavage sites are located before the" " beginning of the sequence and are not reported"; feat->SetComment(a_comm); } sort(locs.begin(), locs.end(), SLessSeq_loc()); copy(locs.begin(), locs.end(), back_inserter(feat->SetLocation().SetMix().Set())); feat->SetLocation (*CSeqUtils::RemapChildToParent(parent_loc, feat->GetLocation())); // save in annot annot.SetData().SetFtable().push_back(feat); }}void CAlgoPlugin_RestrictionSites::RunCommand(CPluginMessage& msg){ const CPluginCommand& args = msg.GetRequest().GetCommand(); CPluginReply& reply = msg.SetReply(); _TRACE("CAlgoPlugin_RestrictionSites::RunCommand()"); // load patterns from file CRebase::EEnzymesToLoad which_enzymes = x_DecodeWhichEnzymes(args["which_enzymes"].AsString()); vector<CREnzyme> enzymes; try { x_LoadREnzymeData(enzymes, which_enzymes); } catch (const exception& e) { NcbiMessageBox(e.what()); reply.SetStatus(eMessageStatus_failed); return; } // optionally lump together all enzymes with identical specificities if (args["combine_isoschizomers"].AsBoolean()) { // first sort alphabetically by enzyme name sort(enzymes.begin(), enzymes.end(), SNameCompare()); // now combine isoschizomers CREnzyme::CombineIsoschizomers(enzymes); } if ( !m_Dialog.get() ) { m_Dialog.reset(new CMultiColDlg()); m_Dialog->SetWindowSize(1000, 500); m_Dialog->SetTitle("Restriction Sites"); m_Dialog->SetColumn(0, "Sequence", FL_ALIGN_LEFT, 0.3f); m_Dialog->SetColumn(1, "Location", FL_ALIGN_LEFT, 0.5f); m_Dialog->SetColumn(2, "Enzyme", FL_ALIGN_LEFT, 0.4f); m_Dialog->SetColumn(3, "Number of Sites", FL_ALIGN_CENTER, 0.5f); m_Dialog->SetColumn(4, "Recog. Site Loc.", FL_ALIGN_CENTER, 0.75f); m_Dialog->SetColumn(5, "Plus Strand Cuts", FL_ALIGN_CENTER, 0.75f); m_Dialog->SetColumn(6, "Minus Strand Cuts", FL_ALIGN_CENTER, 0.75f); } m_Dialog->SetRows(0); // to clear any previous contents int row = 0; plugin_args::TLocList locs; GetArgValue(args["locs"], locs); ITERATE (plugin_args::TLocList, iter, locs) { const CSeq_loc& loc = *iter->second; const IDocument& doc = *iter->first; // find the best ID for this bioseq try { CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc); // get sequence in binary (8na) form CSeqVector vec = handle.GetSequenceView(loc, CBioseq_Handle::eViewConstructed, CBioseq_Handle::eCoding_Ncbi); string& id_str = m_Dialog->SetCell(row, 0); string& loc_str = m_Dialog->SetCell(row, 1); const CSeq_id& best_id = sequence::GetId(handle, sequence::eGetId_Best); id_str.erase(); best_id.GetLabel(&id_str); loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope()); // a new feature table CRef<CSeq_annot> annot(new CSeq_annot()); // a place to store results (one per enzyme) typedef vector<CRef<CREnzResult> > TResults; TResults results;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?