restriction_sites.cpp

来自「ncbi源码」· C++ 代码 · 共 651 行 · 第 1/2 页

CPP
651
字号
/* * =========================================================================== * PRODUCTION $Log: restriction_sites.cpp,v $ * PRODUCTION Revision 1000.5  2004/06/01 20:55:43  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.32 * PRODUCTION * =========================================================================== *//*  $Id: restriction_sites.cpp,v 1000.5 2004/06/01 20:55:43 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Josh Cherry * * File Description:  gbench plugin for finding restriction sites * */#include <ncbi_pch.hpp>#include "restriction_sites.hpp"#include "rebase.hpp"#include <algo/sequence/restriction.hpp>#include <algo/sequence/seq_match.hpp>#include <algorithm>#include <corelib/ncbiapp.hpp>#include <corelib/ncbireg.hpp>#include <gui/core/plugin_utils.hpp>#include <gui/utils/system_path.hpp>#include <gui/core/version.hpp>#include <gui/dialogs/col/multi_col_dlg.hpp>#include <gui/plugin/PluginCommandSet.hpp>#include <gui/plugin/PluginInfo.hpp>#include <gui/plugin/PluginRequest.hpp>#include <gui/plugin/PluginValueConstraint.hpp>#include <gui/utils/message_box.hpp>#include <gui/objutils/utils.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqfeat/Rsite_ref.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPECAlgoPlugin_RestrictionSites::~CAlgoPlugin_RestrictionSites(){}// standard plugin announce bopilerplatevoid CAlgoPlugin_RestrictionSites::GetInfo(CPluginInfo& info){    info.Reset();        // version info macro    info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,                 string(__DATE__) + " " + string(__TIME__),                 "CAlgoPlugin_RestrictionSites", "Search/Find restriction sites",                 "Search a DNA sequence for restriction sites",                 "");    // command info    CPluginCommandSet& cmds = info.SetCommands();    CPluginCommand&    args = cmds.AddAlgoCommand(eAlgoCommand_run);    args.AddArgument("locs", "Locations to evaluate",                     CSeq_loc::GetTypeInfo(),                     CPluginArg::TData::e_Array);    args.SetConstraint("locs",                       (*CPluginValueConstraint::CreateSeqMol(),                        CSeq_inst::eMol_na,                        CSeq_inst::eMol_dna,                        CSeq_inst::eMol_rna));    args.AddDefaultArgument("which_enzymes", "Which restriction enzymes",                            CPluginArg::eString, "commercially available");    args.SetConstraint("which_enzymes", (*CPluginValueConstraint::CreateSet(),                                "commercially available",                                "prototypes", "all"));    args.AddDefaultArgument("combine_isoschizomers",                            "Combine isoschizomers",                            CPluginArg::eBoolean, "true");    args.AddArgument("sort_by_number_of_sites",                     "Sort results by number of (definite) cuts by enzyme",                     CPluginArg::eBoolean);}// helper functor for sorting CRef<CREnzResult> by the enzyme namestruct SEnzymeNameCompare{    bool operator()        (const CRef<CREnzResult>& lhs, const CRef<CREnzResult>& rhs) const    {        return lhs->GetEnzymeName() < rhs->GetEnzymeName();    }};// helper functor for sorting CREnzyme by the enzyme namestruct SNameCompare{    bool operator()        (const CREnzyme& lhs, const CREnzyme& rhs) const    {        return lhs.GetName() < rhs.GetName();    }};// helper functor for sorting by the number of definite sitesstruct SLessDefSites{    bool operator()         (const CRef<CREnzResult>& lhs, const CRef<CREnzResult>& rhs) const    {        return lhs->GetDefiniteSites().size() < rhs->GetDefiniteSites().size();    }};// helper functor for sorting CRef<CSeq_loc>s by locationstruct SLessSeq_loc{    bool operator()         (const CRef<CSeq_loc>& lhs, const CRef<CSeq_loc>& rhs) const    {        return (lhs->Compare(*rhs) < 0);    }};staticvoid s_AddSitesToAnnot(const vector<CRSite>& sites,                       const CREnzResult&    result,                       CSeq_annot&           annot,                       const CSeq_loc&       parent_loc,                       bool                  definite = true){    const CSeq_id& id = sequence::GetId(parent_loc);    ITERATE (vector<CRSite>, site, sites) {        // create feature        CRef<CSeq_feat> feat(new CSeq_feat());        // start to set up Rsite        feat->SetData().SetRsite().SetDb().SetDb("REBASE");        feat->SetData().SetRsite().SetDb()            .SetTag().SetStr("REBASE");        string str(result.GetEnzymeName());        if ( !definite ) {            str = "Possible " + str;        }        feat->SetData().SetRsite().SetStr(str);        //        // build our location        //        vector< CRef<CSeq_loc> > locs;        // a loc for the recognition site        CRef<CSeq_loc> recog_site(new CSeq_loc);        recog_site->SetInt().SetFrom(site->GetStart());        recog_site->SetInt().SetTo  (site->GetEnd());        recog_site->SetId(id);        locs.push_back(recog_site);        // locs for the cleavage sites        int negative_cut_locs = 0;  // count these exceptions        ITERATE (vector<int>, cut, site->GetPlusCuts()) {            if (*cut >= 0 ) {                CRef<CSeq_loc> cut_site(new CSeq_loc);                cut_site->SetPnt().SetPoint(*cut);                // indicate that the cut is to the "left"                cut_site->SetPnt()                    .SetFuzz().SetLim(CInt_fuzz::eLim_tl);                cut_site->SetPnt().SetStrand(eNa_strand_plus);                cut_site->SetId(id);                locs.push_back(cut_site);            } else {                negative_cut_locs++;            }        }        ITERATE (vector<int>, cut, site->GetMinusCuts()) {            if (*cut >= 0 ) {                CRef<CSeq_loc> cut_site(new CSeq_loc);                cut_site->SetPnt().SetPoint(*cut);                // indicate that the cut is to the "left"                cut_site->SetPnt()                    .SetFuzz().SetLim(CInt_fuzz::eLim_tl);                cut_site->SetPnt().SetStrand(eNa_strand_minus);                cut_site->SetId(id);                locs.push_back(cut_site);            } else {                negative_cut_locs++;            }        }        // comment for those few cases where there are        // cuts before the sequence begins        if (negative_cut_locs > 0) {            string a_comm = NStr::IntToString(negative_cut_locs)                + " cleavage sites are located before the"                " beginning of the sequence and are not reported";            feat->SetComment(a_comm);        }        sort(locs.begin(), locs.end(), SLessSeq_loc());        copy(locs.begin(), locs.end(),             back_inserter(feat->SetLocation().SetMix().Set()));        feat->SetLocation            (*CSeqUtils::RemapChildToParent(parent_loc, feat->GetLocation()));        // save in annot        annot.SetData().SetFtable().push_back(feat);    }}void CAlgoPlugin_RestrictionSites::RunCommand(CPluginMessage& msg){    const CPluginCommand& args = msg.GetRequest().GetCommand();    CPluginReply& reply = msg.SetReply();    _TRACE("CAlgoPlugin_RestrictionSites::RunCommand()");        // load patterns from file    CRebase::EEnzymesToLoad which_enzymes         = x_DecodeWhichEnzymes(args["which_enzymes"].AsString());    vector<CREnzyme> enzymes;    try {        x_LoadREnzymeData(enzymes, which_enzymes);    }    catch (const exception& e) {        NcbiMessageBox(e.what());        reply.SetStatus(eMessageStatus_failed);        return;    }    // optionally lump together all enzymes with identical specificities    if (args["combine_isoschizomers"].AsBoolean()) {        // first sort alphabetically by enzyme name        sort(enzymes.begin(), enzymes.end(), SNameCompare());        // now combine isoschizomers        CREnzyme::CombineIsoschizomers(enzymes);    }    if ( !m_Dialog.get() ) {        m_Dialog.reset(new CMultiColDlg());        m_Dialog->SetWindowSize(1000, 500);        m_Dialog->SetTitle("Restriction Sites");                m_Dialog->SetColumn(0, "Sequence", FL_ALIGN_LEFT, 0.3f);        m_Dialog->SetColumn(1, "Location", FL_ALIGN_LEFT, 0.5f);        m_Dialog->SetColumn(2, "Enzyme", FL_ALIGN_LEFT, 0.4f);        m_Dialog->SetColumn(3, "Number of Sites", FL_ALIGN_CENTER, 0.5f);        m_Dialog->SetColumn(4, "Recog. Site Loc.", FL_ALIGN_CENTER, 0.75f);        m_Dialog->SetColumn(5, "Plus Strand Cuts", FL_ALIGN_CENTER, 0.75f);        m_Dialog->SetColumn(6, "Minus Strand Cuts", FL_ALIGN_CENTER, 0.75f);    }    m_Dialog->SetRows(0);  // to clear any previous contents    int row = 0;    plugin_args::TLocList locs;    GetArgValue(args["locs"], locs);    ITERATE (plugin_args::TLocList, iter, locs) {        const CSeq_loc&  loc = *iter->second;        const IDocument& doc = *iter->first;        // find the best ID for this bioseq        try {            CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc);            // get sequence in binary (8na) form            CSeqVector vec =                handle.GetSequenceView(loc,                                       CBioseq_Handle::eViewConstructed,                                       CBioseq_Handle::eCoding_Ncbi);            string& id_str  = m_Dialog->SetCell(row, 0);            string& loc_str = m_Dialog->SetCell(row, 1);            const CSeq_id& best_id =                sequence::GetId(handle, sequence::eGetId_Best);            id_str.erase();            best_id.GetLabel(&id_str);            loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope());            // a new feature table            CRef<CSeq_annot> annot(new CSeq_annot());            // a place to store results (one per enzyme)            typedef vector<CRef<CREnzResult> > TResults;            TResults results;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?