rebase.cpp

来自「ncbi源码」· C++ 代码 · 共 210 行

CPP
210
字号
/* * =========================================================================== * PRODUCTION $Log: rebase.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 20:55:41  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.3 * PRODUCTION * =========================================================================== *//*  $Id: rebase.cpp,v 1000.1 2004/06/01 20:55:41 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Josh Cherry * * File Description:  Code to deal with REBASE restriction enzyme database * */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <algo/sequence/restriction.hpp>#include "rebase.hpp"BEGIN_NCBI_SCOPECRSpec CRebase::MakeRSpec(const string& site){    // site contains a string such as    // GACGTC, GACGT^C, GCAGC(8/12), or (8/13)GAGNNNNNCTC(13/8)        CRSpec spec;    int plus_cut, minus_cut;    string s = site;    if (s[0] == '(') {        string::size_type idx = s.find_first_of(")");        if (idx == std::string::npos) {            throw runtime_error(string("Error parsing site ")                                + site);        }        x_ParseCutPair(s.substr(0, idx + 1), plus_cut, minus_cut);        s.erase(0, idx + 1);        spec.SetPlusCuts().push_back(-plus_cut);        spec.SetMinusCuts().push_back(-minus_cut);    }    if (s[s.length() - 1] == ')') {        string::size_type idx = s.find_last_of("(");        if (idx == std::string::npos) {            throw runtime_error(string("Error parsing site ")                                + site);        }        x_ParseCutPair(s.substr(idx), plus_cut, minus_cut);        s.erase(idx);        spec.SetPlusCuts().push_back(plus_cut + s.length());        spec.SetMinusCuts().push_back(minus_cut + s.length());    }    for (unsigned int i = 0;  i < s.length();  i++) {        if (s[i] == '^') {            // This should be simple.  If we have            // G^GATCC, the cut on the plus strand is            // at 1, and on the minus it's at the            // symmetric position, 5.            // The complication is that TspRI            // is given as CASTGNN^, not NNCASTGNN^.            // Someone someday might write NNCASTGNN^,            // and something like ^NNGATC might arise,            // so code defensively.                        s.erase(i, 1);            int plus_cut = i;            // this is the slightly complicated bit            // trim any leading and trailing N's;            // in case leading N's removed, adjust plus_cut accordingly            string::size_type idx = s.find_first_not_of("N");            if (idx == string::npos) {                // bizarre situation; all N's (possibly empty)                s.erase();                plus_cut = 0;            } else {                s.erase(0, idx);                plus_cut -= idx;            }            idx = s.find_last_not_of("N");            s.erase(idx + 1);            // plus strand cut            spec.SetPlusCuts().push_back(plus_cut);            // symmetric cut on minus strand            spec.SetMinusCuts().push_back(s.length() - plus_cut);            break;  // there better be just one '^'        }    }    spec.SetSeq(s);    return spec;}CREnzyme CRebase::MakeREnzyme(const string& name, const string& sites){    CREnzyme re;    re.SetName(name);        vector<string> site_vec;    NStr::Tokenize(sites, ",", site_vec);    ITERATE (vector<string>, iter, site_vec) {        CRSpec spec = CRebase::MakeRSpec(*iter);        re.SetSpecs().push_back(spec);    }        return re;}void CRebase::x_ParseCutPair(const string& s, int& plus_cut, int& minus_cut){    // s should look like "(8/13)"; plus_cut gets set to 8 and minus_cut to 13    list<string> l;    NStr::Split(s.substr(1, s.length() - 2), "/", l);    if (l.size() != 2) {        throw runtime_error(string("Couldn't parse cut locations ")                            + s);    }    plus_cut = NStr::StringToInt(l.front());    minus_cut = NStr::StringToInt(l.back());}void CRebase::ReadNARFormat(istream& input,                            vector<CREnzyme>& enzymes,                            enum EEnzymesToLoad which){    string line;    CREnzyme enzyme;    string name;    while (getline(input, line)) {        vector<string> fields;        NStr::Tokenize(line, "\t", fields);        // the lines we're interested in have more than one field        if (fields.size() < 2) {            continue;        }        // name of enzyme is in field one or two (field zero is empty)        name = fields[1];        if (name == " ") {            if (which == ePrototype) {                continue;  // skip this non-protype            }            name = fields[2];        }        if (which == eCommercial && fields[5].empty()) {            continue;  // skip this commercially unavailable enzyme        }        string sites = fields[3];  // this contains a comma-separted list of sites,                            // usually just one (but TaqII has two)        enzyme = MakeREnzyme(name, sites);        enzymes.push_back(enzyme);    }}END_NCBI_SCOPE/* * =========================================================================== * $Log: rebase.cpp,v $ * Revision 1000.1  2004/06/01 20:55:41  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.3 * * Revision 1.3  2004/05/21 22:27:47  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.2  2003/08/21 19:24:16  jcherry * Moved restriction site finding to algo/sequence * * Revision 1.1  2003/08/12 18:52:58  jcherry * Initial version * * =========================================================================== */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?