nmer_repeats.cpp

来自「ncbi源码」· C++ 代码 · 共 291 行

CPP
291
字号
/* * =========================================================================== * PRODUCTION $Log: nmer_repeats.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 20:55:10  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//*  $Id: nmer_repeats.cpp,v 1000.1 2004/06/01 20:55:10 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Josh Cherry * * File Description:  gbench plugin for finding nmer repeats * */#include <ncbi_pch.hpp>#include "nmer_repeats.hpp"#include <algo/sequence/find_pattern.hpp>#include <gui/core/plugin_utils.hpp>#include <gui/core/version.hpp>#include <gui/dialogs/col/multi_col_dlg.hpp>#include <gui/plugin/PluginCommandSet.hpp>#include <gui/plugin/PluginInfo.hpp>#include <gui/plugin/PluginReply.hpp>#include <gui/plugin/PluginRequest.hpp>#include <gui/plugin/PluginValueConstraint.hpp>#include <gui/objutils/utils.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CAlgoPlugin_NmerRepeats::~CAlgoPlugin_NmerRepeats(){}// standard plugin announce bopilerplatevoid CAlgoPlugin_NmerRepeats::GetInfo(CPluginInfo& info){    info.Reset();        // version info macro    info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,                 string(__DATE__) + " " + string(__TIME__),                 "CAlgoPlugin_NmerRepeats", "Search/Find n-mer repeats",                 "Find di-, tri-, and tetra-nucleotide repeats",                 "");    // command info    CPluginCommandSet& cmds = info.SetCommands();    CPluginCommand&    args = cmds.AddAlgoCommand(eAlgoCommand_run);    args.AddArgument("locs", "Locations to evaluate",                     CSeq_loc::GetTypeInfo(),                     CPluginArg::TData::e_Array);    args.SetConstraint("locs",                       (*CPluginValueConstraint::CreateSeqMol(),                        CSeq_inst::eMol_na,                        CSeq_inst::eMol_dna,                        CSeq_inst::eMol_rna));    args.AddDefaultArgument("dinuc_min",                            "Minimum number of dinuclotide units",                            CPluginArg::eInteger, "5");    args.AddDefaultArgument("trinuc_min",                            "Minimum number of trinuclotide units",                            CPluginArg::eInteger, "5");    args.AddDefaultArgument("tetranuc_min",                            "Minimum number of tetranuclotide units",                            CPluginArg::eInteger, "5");}void CAlgoPlugin_NmerRepeats::RunCommand(CPluginMessage& msg){    const CPluginCommand& args = msg.GetRequest().GetCommand();    CPluginReply& reply = msg.SetReply();    _TRACE("CAlgoPlugin_NmerRepeats::Run()");    vector<int> minima(5);    minima[2] = args["dinuc_min"].AsInteger();    minima[3] = args["trinuc_min"].AsInteger();    minima[4] = args["tetranuc_min"].AsInteger();    if ( !m_Dialog.get() ) {        m_Dialog.reset(new CMultiColDlg());        m_Dialog->SetWindowSize(600, 350);        m_Dialog->SetTitle("n-mer Nucleotide Repeats");        m_Dialog->SetLabel("Search results:");        m_Dialog->SetColumn(0, "Sequence");        m_Dialog->SetColumn(1, "Location");        m_Dialog->SetColumn(2, "Position", FL_ALIGN_CENTER, 2.0);        m_Dialog->SetColumn(3, "Repeat", FL_ALIGN_LEFT);    }    // clear any previous contents    m_Dialog->SetRows(0);    vector<TSeqPos> starts;    vector<TSeqPos> ends;    int row = 0;    plugin_args::TLocList locs;    GetArgValue(args["locs"], locs);    ITERATE (plugin_args::TLocList, iter, locs) {        const CSeq_loc&  loc = *iter->second;        const IDocument& doc = *iter->first;        // find the best ID for this bioseq        try {            CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc);            CSeqVector vec =                handle.GetSequenceView(loc,                                       CBioseq_Handle::eViewConstructed,                                       CBioseq_Handle::eCoding_Iupac);            string seq;            vec.GetSeqData( (TSeqPos) 0, vec.size(), seq );            string& id_str  = m_Dialog->SetCell(row, 0);            string& loc_str = m_Dialog->SetCell(row, 1);            const CSeq_id& best_id =                sequence::GetId(handle, sequence::eGetId_Best);            id_str.erase();            best_id.GetLabel(&id_str);            loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope());            CRef<CSeq_annot> annot(new CSeq_annot());            string rep_unit;            int rep_num;            string rep_summary;            for (unsigned int n = 2;  n <= 4;  ++n) {                starts.clear();                ends.clear();                CFindPattern::FindNucNmerRepeats(seq, n, minima[n],                                                 starts, ends);                // preallocate rows in dialog for speed                m_Dialog->SetRows(row + starts.size());                for(unsigned int k = 0;  k < starts.size();  k++) {                    rep_unit = seq.substr(starts[k], n);                                        if (rep_unit.find_first_not_of(rep_unit.substr(0, 1), 1)                        == string::npos) {                        // skip it if it's a monomer repeat                        continue;                    }                                        if (n == 4) {                        // check whether it's a dimer repeat too                        if (rep_unit[2] == rep_unit[0] &&                            rep_unit[3] == rep_unit[1]) {                            // if so, skip it                            continue;                        }                    }                    rep_num = (ends[k] - starts[k] + 1) / n;                    rep_summary = "(" + rep_unit + ")" +                        NStr::IntToString(rep_num);                    string& pos_str = m_Dialog->SetCell(row, 2);                    // 1-based indexing for dialog                    pos_str = NStr::IntToString(starts[k] + 1) + " - "                        + NStr::IntToString(ends[k] + 1);                    m_Dialog->SetCell(row, 3) = rep_summary;                                            ++row;                    // create feature                    CRef<CSeq_feat> feat(new CSeq_feat());                    // set correct location                    {{                        CSeq_loc& floc = feat->SetLocation();                        floc.SetInt().SetId().Assign(sequence::GetId(loc));                        floc.SetInt().SetFrom(starts[k]);                        floc.SetInt().SetTo(ends[k]);                        floc.SetInt().SetStrand(eNa_strand_plus);                        CRef<CSeq_loc> new_loc =                            CSeqUtils::RemapChildToParent(loc, floc);                        feat->SetLocation(*new_loc);                    }}                    // set feature data                    feat->SetData().SetRegion() = "Repeat: " + rep_summary;                        // save in annot                    annot->SetData().SetFtable().push_back(feat);                }            }            // add description to annot            annot->AddName("n-mer Repeats");            reply.AddObject(doc, *annot);        }        catch (CException& e) {            string str = CPluginUtils::GetLabel(loc, &doc.GetScope());            LOG_POST(Error << "Error processing location " << str                     << ": " << e.what());        }#ifndef _DEBUG        catch (...) {            string str = CPluginUtils::GetLabel(loc, &doc.GetScope());            LOG_POST(Error << "Error processing location " << str);        }#endif    }    //    // prepare our dialog box    //    m_Dialog->SetRows(row);    m_Dialog->Show();    reply.AddAction(CPluginReplyAction::e_Add_to_document);    reply.SetStatus(eMessageStatus_success);}END_NCBI_SCOPE/* * =========================================================================== * $Log: nmer_repeats.cpp,v $ * Revision 1000.1  2004/06/01 20:55:10  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * * Revision 1.6  2004/05/21 22:27:46  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.5  2004/05/03 13:05:42  dicuccio * gui/utils --> gui/objutils where needed * * Revision 1.4  2004/03/05 17:35:37  dicuccio * Use sequence::GetId() instead of CSeq_id::GetStringDescr() * * Revision 1.3  2004/01/27 18:37:47  dicuccio * Code clean-up.  Use standard names for plugins.  Removed unnecessary #includes * * Revision 1.2  2004/01/07 15:50:37  dicuccio * Adjusted for API change in CPluginUtils::GetLabel().  Standardized exception * reporting in algorithms. * * Revision 1.1  2003/12/17 17:35:09  jcherry * Initial version * * =========================================================================== */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?