📄 restriction.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * =========================================================================== * PRODUCTION $Log: restriction.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 18:10:51  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.11 * PRODUCTION * =========================================================================== *//*  $Id: restriction.cpp,v 1000.1 2004/06/01 18:10:51 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Josh Cherry * * File Description:  Classes for representing and finding restriction sites * */#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objmgr/seq_vector.hpp>#include <algorithm>#include <algo/sequence/restriction.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);bool CRSpec::operator<(const CRSpec& rhs) const{    if (GetSeq() != rhs.GetSeq()) {        return GetSeq() < rhs.GetSeq();    }    // otherwise sequences identical    if (GetPlusCuts() != rhs.GetPlusCuts()) {        return GetPlusCuts() < rhs.GetPlusCuts();    }    if (GetMinusCuts() != rhs.GetMinusCuts()) {        return GetMinusCuts() < rhs.GetMinusCuts();    }    // otherwise arguments are equal, and < is false    return false;}void CREnzyme::Reset(void){    m_Name.erase();    m_Specs.clear();}// helper functor for sorting enzymes by specificitystruct SCompareSpecs{    bool operator()(const CREnzyme& lhs, const CREnzyme& rhs)    {        return lhs.GetSpecs() < rhs.GetSpecs();    }};void CREnzyme::CombineIsoschizomers(vector<CREnzyme>& enzymes){    stable_sort(enzymes.begin(), enzymes.end(), SCompareSpecs());    vector<CREnzyme> result;    ITERATE (vector<CREnzyme>, enzyme, enzymes) {        if (enzyme != enzymes.begin() &&            enzyme->GetSpecs() == result.back().GetSpecs()) {            result.back().SetName() += "/";            result.back().SetName() += enzyme->GetName();        } else {            result.push_back(*enzyme);        }    }    swap(enzymes, result);}void CRSpec::Reset(void){    m_Seq.erase();    m_PlusCuts.clear();    m_MinusCuts.clear();}ostream& operator<<(ostream& os, const CRSite& site){    os << "Recog. site: " << site.GetStart() << '-'       << site.GetEnd() << endl;    os << "Plus strand cuts: ";    string s;    ITERATE (vector<int>, cut, site.GetPlusCuts()) {        if (!s.empty()) {            s += " ,";        }        s += NStr::IntToString(*cut);    }    os << s << endl;    os << "Minus strand cuts: ";    s.erase();    ITERATE (vector<int>, cut, site.GetMinusCuts()) {        if (!s.empty()) {            s += " ,";        }        s += NStr::IntToString(*cut);    }    os << s << endl;        return os;    }ostream& operator<<(ostream& os, const CREnzResult& er){    os << "Enzyme: " << er.GetEnzymeName() << endl;    os << er.GetDefiniteSites().size() << " definite sites:" << endl;    ITERATE (vector<CRSite>, site, er.GetDefiniteSites()) {        os << *site;    }    os << er.GetPossibleSites().size() << " possible sites:" << endl;    ITERATE (vector<CRSite>, site, er.GetPossibleSites()) {        os << *site;    }    return os;}struct SCompareLocation{    bool operator() (const CRSite& lhs, const CRSite& rhs) const    {        return lhs.GetStart() < rhs.GetStart();    }};// Class for internal use by restriction site finding.// Holds a pattern in ncbi8na, integers that specify// which enzyme (of a sequential collection) // and which specifity of that// enzyme it represents, a field indicating// whether it represents the complement of the specificity,// and the length of the initial subpattern that was// put into the fsm.class CPatternRec{public:    enum EIsComplement {        eIsNotComplement,        eIsComplement    };    CPatternRec(string pattern, int enzyme_index, int spec_index,                EIsComplement is_complement,                unsigned int fsm_pat_size) : m_Pattern(pattern),                                             m_EnzymeIndex(enzyme_index),                                             m_SpecIndex(spec_index),                                             m_IsComplement(is_complement),                                             m_FsmPatSize(fsm_pat_size) {}    // member access    const string& GetPattern(void) const {return m_Pattern;}    int GetEnzymeIndex(void) const {return m_EnzymeIndex;}    int GetSpecIndex(void) const {return m_SpecIndex;}    EIsComplement GetIsComlement(void) const {return m_IsComplement;}    bool IsComplement(void) const {return m_IsComplement == eIsComplement;}    unsigned int GetFsmPatSize(void) const {return m_FsmPatSize;}private:    // pattern in ncbi8na    string m_Pattern;    // which enzyme and specificity we represent    int m_EnzymeIndex;    int m_SpecIndex;    // whether we represent the complement of the specificity    EIsComplement m_IsComplement;    unsigned int m_FsmPatSize;};void CFindRSites::x_ExpandRecursion(string& s, unsigned int pos,                                    CTextFsm<int>& fsm, int match_value){    if (pos == s.size()) {        // this is the place        fsm.AddWord(s, match_value);        return;    }    char orig_ch = s[pos];    for (char x = 1;  x <= 8;  x <<= 1) {        if (orig_ch & x) {            s[pos] = x;            x_ExpandRecursion(s, pos + 1, fsm, match_value);        }    }    // restore original character    s[pos] = orig_ch;}void CFindRSites::x_AddPattern(const string& pat, CTextFsm<int>& fsm,                               int match_value){    string s = pat;    x_ExpandRecursion(s, 0, fsm, match_value);}bool CFindRSites::x_IsAmbig(char nuc){    static const bool ambig_table[16] = {        0, 0, 0, 1, 0, 1, 1, 1,        0, 1, 1, 1, 1, 1, 1, 1    };    return ambig_table[nuc];}/// Find all definite and possible sites in a sequence/// for a vector of enzymes, using a finite state machine./// Templatized so that seq can be various containers/// (e.g., string, vector<char>, CSeqVector),/// but it must yield ncbi8na.template<class Seq>void x_FindRSite(const Seq& seq, const vector<CREnzyme>& enzymes,                 vector<CRef<CREnzResult> >& results){    results.clear();    results.reserve(enzymes.size());    // vector of patterns for internal use    vector<CPatternRec> patterns;    patterns.reserve(enzymes.size());  // an underestimate        // the finite state machine for the search    CTextFsm<int> fsm;    // iterate over enzymes    ITERATE (vector<CREnzyme>, enzyme, enzymes) {        results.push_back(CRef<CREnzResult>                           (new CREnzResult(enzyme->GetName())));        string pat;        const vector<CRSpec>& specs = enzyme->GetSpecs();        // iterate over specificities for this enzyme        ITERATE (vector<CRSpec>, spec, specs) {            // one or two patterns for this specificity            // (one for pallindrome, two otherwise)                        // to avoid combinatorial explosion,            // if there are more than two Ns only use            // part of pattern before first N            CSeqMatch::IupacToNcbi8na(spec->GetSeq(), pat);            SIZE_TYPE fsm_pat_size = pat.find_first_of(0x0f);            {{                SIZE_TYPE pos = pat.find_first_of(0x0f, fsm_pat_size + 1);                if (pos == NPOS                    ||  pat.find_first_of(0x0f, pos + 1) == NPOS) {                    fsm_pat_size = pat.size();                }            }}            patterns.push_back(CPatternRec(pat, enzyme - enzymes.begin(),                                           spec - specs.begin(),                                            CPatternRec::eIsNotComplement,
12 下一页
💿 文件大小 13605 K
👤 上传用户 lovely19891019
📂 所属分类生物技术
🏷️ 相关标签

#ncbi #源码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -