首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
restriction.cpp

来自「ncbi源码」· C++ 代码 · 共 585 行 · 第 1/2 页
CPP
585 行
                                           fsm_pat_size));            // add pattern to fsm            // (add only fsm_pat_size of it)            CFindRSites::x_AddPattern(pat.substr(0, fsm_pat_size), fsm,                                      patterns.size() - 1);            // if the pattern is not pallindromic,            // do a search for its complement too            string comp = pat;            CSeqMatch::CompNcbi8na(comp);            if (comp != pat) {                {{                    fsm_pat_size = comp.find_first_of(0x0f);                    SIZE_TYPE pos = comp.find_first_of(0x0f, fsm_pat_size + 1);                    if (pos == NPOS                        ||  comp.find_first_of(0x0f, pos + 1) == NPOS) {                        fsm_pat_size = comp.size();                    }                }}                patterns.push_back(CPatternRec(comp, enzyme                                                - enzymes.begin(),                                               spec - specs.begin(),                                                CPatternRec::eIsComplement,                                               fsm_pat_size));                // add pattern to fsm                // (add only fsm_pat_size of it)                CFindRSites::x_AddPattern(comp.substr(0, fsm_pat_size), fsm,                                          patterns.size() - 1);            }        }    }        // The fsm is set up.    // Now do the search.    fsm.Prime();    vector<int> ambig_nucs;  // for dealing with ambiguities later    int state = fsm.GetInitialState();    for (unsigned int i = 0;  i < seq.size();  i++) {        if (CFindRSites::x_IsAmbig(seq[i])) {            ambig_nucs.push_back(i);        }        state = fsm.GetNextState(state, seq[i]);        if (fsm.IsMatchFound(state)) {            const vector<int>& matches = fsm.GetMatches(state);            ITERATE (vector<int>, match, matches) {                const CPatternRec& pattern = patterns[*match];                TSeqPos begin_pos = i + 1 - pattern.GetFsmPatSize();                TSeqPos end_pos = begin_pos                     + pattern.GetPattern().size() - 1;                                // check for a full match to sequence                if (pattern.GetFsmPatSize()                    != pattern.GetPattern().size()) {                    // Pattern put into fsm was less than the full pattern.                    // Must check that the sequence really matches.                    if (end_pos >= seq.size()) {                        // full pattern goes off the end of sequence;                        // not a match                        continue;                    }                    // could really test match for just                     // right end of pattern here                    if (CSeqMatch::MatchNcbi8na(seq, pattern.GetPattern(),                                                begin_pos)                         == CSeqMatch::eYes) {                        // There must be a site here.  However, because                        // we'll later check all stretches containing                        // ambiguities, we must ignore them here to keep                        // from recording them twice.                        bool ambig = false;                        for (unsigned int n = begin_pos;  n <= end_pos;                             n++) {                            if (CFindRSites::x_IsAmbig(seq[n])) {                                ambig = true;                                break;                            }                        }                        if (ambig) {                            continue;  // ignore matches with ambig. seq.                        }                    } else {                        continue;  // not a real match                    }                }                CRSite site(begin_pos, end_pos);                const CRSpec& spec = enzymes[pattern.GetEnzymeIndex()]                    .GetSpecs()[pattern.GetSpecIndex()];                const vector<int>& plus_cuts = spec.GetPlusCuts();                ITERATE (vector<int>, cut, plus_cuts) {                    if (pattern.IsComplement()) {                        site.SetPlusCuts()                            .push_back(begin_pos                                        + pattern.GetPattern().size()                                       - *cut);                    } else {                        site.SetPlusCuts().push_back(begin_pos + *cut);                    }                }                const vector<int>& minus_cuts = spec.GetMinusCuts();                ITERATE (vector<int>, cut, minus_cuts) {                    if (pattern.IsComplement()) {                        site.SetMinusCuts()                            .push_back(begin_pos                                       + pattern.GetPattern().size()                                       - *cut);                    } else {                        site.SetMinusCuts().push_back(begin_pos + *cut);                    }                }                results[pattern.GetEnzymeIndex()]->SetDefiniteSites()                    .push_back(site);            }        }    }  // end iteration over the sequence    // We've found all the matches involving unambiguous characters    // in sequence.  Now go back and examine any ambiguous places.    if (!ambig_nucs.empty()) {        ITERATE (vector<CPatternRec>, pattern, patterns) {            const string& pat = pattern->GetPattern();            // for reordering later            int ds_pos = results[pattern->GetEnzymeIndex()]                ->GetDefiniteSites().size();            int ps_pos = results[pattern->GetEnzymeIndex()]                ->GetPossibleSites().size();            // the next possible (starting) position to check            int next_pos = 0;            // iterate over ambiguous positions            ITERATE (vector<int>, pos, ambig_nucs) {                int begin_check = *pos - pat.size() + 1;                // don't try to check a negative position                begin_check = max(begin_check, 0);                // to avoid recording a site multiple times                // when there are two nearby ambiguities                begin_check = max(begin_check, next_pos);                int end_check = min(*pos, (int) (seq.size() - pat.size()));                int i;                for (i = begin_check;  i <= end_check;  i++) {                    CSeqMatch::EMatch match                        = CSeqMatch::MatchNcbi8na(seq, pat, i);                    if (match == CSeqMatch::eNo) {                        continue;  // no match of any sort                    }                    // otherwise, a definite or possible site                    CRSite site(i, i + pat.size() -1);                    // figure out cleavage locations                    const vector<int>& plus_cuts                         = enzymes[pattern->GetEnzymeIndex()]                        .GetSpecs()[pattern->GetSpecIndex()].GetPlusCuts();                    ITERATE (vector<int>, cut, plus_cuts) {                        if (pattern->IsComplement()) {                            site.SetPlusCuts()                                .push_back(i + pattern->GetPattern().size()                                           - *cut);                        } else {                            site.SetPlusCuts().push_back(i + *cut);                        }                    }                    const vector<int>& minus_cuts                         = enzymes[pattern->GetEnzymeIndex()]                        .GetSpecs()[pattern->GetSpecIndex()]                        .GetMinusCuts();                    ITERATE (vector<int>, cut, minus_cuts) {                        if (pattern->IsComplement()) {                            site.SetMinusCuts()                                .push_back(i + pattern->GetPattern().size()                                           - *cut);                        } else {                            site.SetMinusCuts().push_back(i + *cut);                        }                    }                    if (match == CSeqMatch::eYes) {                        results[pattern->GetEnzymeIndex()]                            ->SetDefiniteSites().push_back(site);                    } else {                        results[pattern->GetEnzymeIndex()]                            ->SetPossibleSites().push_back(site);                    }                }                next_pos = i;            }  // end ITERATE over ambiguous positions                        // definite_sites and possible_sites may be out of order            vector<CRSite>& def_sites = results[pattern->GetEnzymeIndex()]                ->SetDefiniteSites();            inplace_merge(def_sites.begin(),                          def_sites.begin() + ds_pos,                          def_sites.end(),                          SCompareLocation());                        vector<CRSite>& pos_sites = results[pattern->GetEnzymeIndex()]                ->SetPossibleSites();            inplace_merge(pos_sites.begin(),                          pos_sites.begin() + ps_pos,                          pos_sites.end(),                          SCompareLocation());                    }  // end ITERATE over patterns    }  // end if (!ambig_nucs.empty())}// CFindRSites::Find for various sequence containersvoid CFindRSites::Find(const string& seq,                       const vector<CREnzyme>& enzymes,                       vector<CRef<CREnzResult> >& results){    x_FindRSite(seq, enzymes, results);}void CFindRSites::Find(const vector<char>& seq,                       const vector<CREnzyme>& enzymes,                       vector<CRef<CREnzResult> >& results){    x_FindRSite(seq, enzymes, results);}void CFindRSites::Find(const CSeqVector& seq,                       const vector<CREnzyme>& enzymes,                       vector<CRef<CREnzResult> >& results){    string seq_ncbi8na;    CSeqVector vec(seq);    vec.SetNcbiCoding();    vec.GetSeqData(0, vec.size(), seq_ncbi8na);    x_FindRSite(seq_ncbi8na, enzymes, results);}END_NCBI_SCOPE/* * =========================================================================== * $Log: restriction.cpp,v $ * Revision 1000.1  2004/06/01 18:10:51  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.11 * * Revision 1.11  2004/05/21 21:41:04  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.10  2003/08/28 15:49:43  jcherry * Fixed problem with x_FindRSite; pattern was being searched before * it was read. * * Revision 1.9  2003/08/22 14:25:58  ucko * Fix for MSVC, which seems to have problems with member templates. * * Revision 1.8  2003/08/22 02:17:18  ucko * Fix WorkShop compilation. * * Revision 1.7  2003/08/21 20:05:59  dicuccio * Added USING_SCOPE(objects) for MSVC * * Revision 1.6  2003/08/21 19:22:47  jcherry * Moved restriction site finding to algo/sequence * * Revision 1.5  2003/08/21 18:38:31  jcherry * Overloaded CFindRSites::Find to take several sequence containers. * Added option to lump together enzymes with identical specificities. * * Revision 1.4  2003/08/21 12:03:07  dicuccio * Make use of new typedef in plugin_utils.hpp for argument values. * * Revision 1.3  2003/08/20 22:57:44  jcherry * Reimplemented restriction site finding using finite state machine * * Revision 1.2  2003/08/17 19:25:30  jcherry * Changed member variable names to follow convention * * Revision 1.1  2003/08/12 18:52:58  jcherry * Initial version * * =========================================================================== */
restriction.cpp - 源码说明

本页面展示了「ncbi源码」中的 restriction.cpp 源码文件，采用 C++ 编程语言编写，共 585 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?