seq_match.hpp

来自「ncbi源码」· HPP 代码 · 共 214 行

HPP
214
字号
/* * =========================================================================== * PRODUCTION $Log: seq_match.hpp,v $ * PRODUCTION Revision 1000.0  2003/10/29 19:29:03  gouriano * PRODUCTION PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R1.5 * PRODUCTION * =========================================================================== *//*  $Id: seq_match.hpp,v 1000.0 2003/10/29 19:29:03 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Josh Cherry * * File Description:  Simple pattern matching for sequences * */#ifndef GUI_CORE_ALGO_BASIC___SEQ_MATCH__HPP#define GUI_CORE_ALGO_BASIC___SEQ_MATCH__HPP#include <corelib/ncbistd.hpp>BEGIN_NCBI_SCOPE////// This class provides functions for determining where/// sequences, perhaps containing ambiguity codes,/// must or can match patterns.////// The functions are highly templatized.  The main/// reason is to allow reuse of pattern-matching code with/// different 'alphabets'.  A second reason is to allow use/// of different container classes for sequences and patterns,/// e.g., the ncbi8na matching will work with both/// string and vector<char>./// This would be much nicer with template template parameters,/// but MSVC doesn't support them.///class CSeqMatch{public:    enum EMatch {        eNo,        eYes,        eMaybe    };    /// determine whether ncbi8na base s is a match to q.    static EMatch CompareNcbi8na(char s, char q);    /// determine whether seq matches pattern pat starting at position pos.    ///    /// It is the caller's responsibility to ensure that there are    /// enough 'characters' in seq, i.e., that pos + pat.size() <= seq.size()    template<class Seq, class Pat, class Compare_fun>    static EMatch Match(const Seq& seq, const Pat& pat, TSeqPos pos,                        const Compare_fun compare_fun)    {        // for efficiency, no check is made that we're not looking        // past the end of seq; caller must assure this        EMatch rv = eYes;        // check pattern positions in succession        for (unsigned int i = 0;  i < pat.size();  i++) {            EMatch res = compare_fun(seq[pos+i], pat[i]);            if (res == eNo) {                return eNo;            }            if (res == eMaybe) {                rv = eMaybe;            }        }        // if we got here, everybody at least could have matched        return rv;    }    template<class Seq, class Pat>    static EMatch MatchNcbi8na(const Seq& seq,                               const Pat& pat, TSeqPos pos)    {        return Match(seq, pat, pos, CompareNcbi8na);    }    template <class Seq, class Pat>    struct SMatchNcbi8na    {        EMatch operator() (const Seq& seq,                           const Pat& pat, TSeqPos pos) const        {            return CSeqMatch::Match(seq, pat, pos, CompareNcbi8na);        }    };    /// find all places where seq must or might match pat    template<class Seq, class Pat, class Match_fun>    static void FindMatches(const Seq& seq,                            const Pat& pat,                            vector<TSeqPos>& definite_matches,                            vector<TSeqPos>& possible_matches,                            Match_fun match)    {        for (unsigned int i = 0;  i < seq.size() - pat.size() + 1; i++) {            EMatch res = match(seq, pat, i);            if (res == eNo) {                continue;            }            if (res == eYes) {                definite_matches.push_back(i);                continue;            }            // otherwise must be eMaybe            possible_matches.push_back(i);        }    }    template<class Seq, class Pat>    static void FindMatchesNcbi8na(const Seq& seq,                                   const Pat& pat,                                   vector<TSeqPos>& definite_matches,                                   vector<TSeqPos>& possible_matches)    {        FindMatches(seq, pat,                    definite_matches, possible_matches,                    SMatchNcbi8na<Seq, Pat>());    }    /// stuff for dealing with ncbi8na.    /// doesn't really belong here, but oh well    /// convert a single base from IUPAC to ncbi8na    NCBI_XALGOSEQ_EXPORT static char IupacToNcbi8na(char in);    /// convert a whole string from IUPAC to ncbi8na    NCBI_XALGOSEQ_EXPORT static void IupacToNcbi8na(const string& in, string& out);    /// complement an ncbi8na sequence in place    NCBI_XALGOSEQ_EXPORT static void CompNcbi8na(string& seq8na);    /// complement a single ncbi8na base    NCBI_XALGOSEQ_EXPORT static char CompNcbi8na(char);};// works on ncbi8na// s can match q iff they have some set bits in common// s must match q iff it represents a subset,// i.e., if no bits set in s are unset in qinlineCSeqMatch::EMatch CSeqMatch::CompareNcbi8na(char s, char q){    if (!(s & q)) {        // nothing in common        return eNo;    }    if (s & ~q) {        return eMaybe;    }    return eYes;}END_NCBI_SCOPE#endif   // GUI_CORE_ALGO_BASIC___SEQ_MATCH__HPP/* * =========================================================================== * $Log: seq_match.hpp,v $ * Revision 1000.0  2003/10/29 19:29:03  gouriano * PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R1.5 * * Revision 1.5  2003/08/18 20:07:04  dicuccio * Corrected export specifiers * * Revision 1.4  2003/08/18 20:01:06  jcherry * Changed function argument name to avoid confusion with std::compare * * Revision 1.3  2003/08/18 19:22:13  jcherry * Moved orf and seq_match to algo/sequence * * Revision 1.2  2003/08/13 16:42:11  dicuccio * Compilation fixes for MSVC * * Revision 1.1  2003/08/12 18:52:58  jcherry * Initial version * * =========================================================================== */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?