seq_match.hpp
来自「ncbi源码」· HPP 代码 · 共 214 行
HPP
214 行
/* * =========================================================================== * PRODUCTION $Log: seq_match.hpp,v $ * PRODUCTION Revision 1000.0 2003/10/29 19:29:03 gouriano * PRODUCTION PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R1.5 * PRODUCTION * =========================================================================== *//* $Id: seq_match.hpp,v 1000.0 2003/10/29 19:29:03 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Josh Cherry * * File Description: Simple pattern matching for sequences * */#ifndef GUI_CORE_ALGO_BASIC___SEQ_MATCH__HPP#define GUI_CORE_ALGO_BASIC___SEQ_MATCH__HPP#include <corelib/ncbistd.hpp>BEGIN_NCBI_SCOPE////// This class provides functions for determining where/// sequences, perhaps containing ambiguity codes,/// must or can match patterns.////// The functions are highly templatized. The main/// reason is to allow reuse of pattern-matching code with/// different 'alphabets'. A second reason is to allow use/// of different container classes for sequences and patterns,/// e.g., the ncbi8na matching will work with both/// string and vector<char>./// This would be much nicer with template template parameters,/// but MSVC doesn't support them.///class CSeqMatch{public: enum EMatch { eNo, eYes, eMaybe }; /// determine whether ncbi8na base s is a match to q. static EMatch CompareNcbi8na(char s, char q); /// determine whether seq matches pattern pat starting at position pos. /// /// It is the caller's responsibility to ensure that there are /// enough 'characters' in seq, i.e., that pos + pat.size() <= seq.size() template<class Seq, class Pat, class Compare_fun> static EMatch Match(const Seq& seq, const Pat& pat, TSeqPos pos, const Compare_fun compare_fun) { // for efficiency, no check is made that we're not looking // past the end of seq; caller must assure this EMatch rv = eYes; // check pattern positions in succession for (unsigned int i = 0; i < pat.size(); i++) { EMatch res = compare_fun(seq[pos+i], pat[i]); if (res == eNo) { return eNo; } if (res == eMaybe) { rv = eMaybe; } } // if we got here, everybody at least could have matched return rv; } template<class Seq, class Pat> static EMatch MatchNcbi8na(const Seq& seq, const Pat& pat, TSeqPos pos) { return Match(seq, pat, pos, CompareNcbi8na); } template <class Seq, class Pat> struct SMatchNcbi8na { EMatch operator() (const Seq& seq, const Pat& pat, TSeqPos pos) const { return CSeqMatch::Match(seq, pat, pos, CompareNcbi8na); } }; /// find all places where seq must or might match pat template<class Seq, class Pat, class Match_fun> static void FindMatches(const Seq& seq, const Pat& pat, vector<TSeqPos>& definite_matches, vector<TSeqPos>& possible_matches, Match_fun match) { for (unsigned int i = 0; i < seq.size() - pat.size() + 1; i++) { EMatch res = match(seq, pat, i); if (res == eNo) { continue; } if (res == eYes) { definite_matches.push_back(i); continue; } // otherwise must be eMaybe possible_matches.push_back(i); } } template<class Seq, class Pat> static void FindMatchesNcbi8na(const Seq& seq, const Pat& pat, vector<TSeqPos>& definite_matches, vector<TSeqPos>& possible_matches) { FindMatches(seq, pat, definite_matches, possible_matches, SMatchNcbi8na<Seq, Pat>()); } /// stuff for dealing with ncbi8na. /// doesn't really belong here, but oh well /// convert a single base from IUPAC to ncbi8na NCBI_XALGOSEQ_EXPORT static char IupacToNcbi8na(char in); /// convert a whole string from IUPAC to ncbi8na NCBI_XALGOSEQ_EXPORT static void IupacToNcbi8na(const string& in, string& out); /// complement an ncbi8na sequence in place NCBI_XALGOSEQ_EXPORT static void CompNcbi8na(string& seq8na); /// complement a single ncbi8na base NCBI_XALGOSEQ_EXPORT static char CompNcbi8na(char);};// works on ncbi8na// s can match q iff they have some set bits in common// s must match q iff it represents a subset,// i.e., if no bits set in s are unset in qinlineCSeqMatch::EMatch CSeqMatch::CompareNcbi8na(char s, char q){ if (!(s & q)) { // nothing in common return eNo; } if (s & ~q) { return eMaybe; } return eYes;}END_NCBI_SCOPE#endif // GUI_CORE_ALGO_BASIC___SEQ_MATCH__HPP/* * =========================================================================== * $Log: seq_match.hpp,v $ * Revision 1000.0 2003/10/29 19:29:03 gouriano * PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R1.5 * * Revision 1.5 2003/08/18 20:07:04 dicuccio * Corrected export specifiers * * Revision 1.4 2003/08/18 20:01:06 jcherry * Changed function argument name to avoid confusion with std::compare * * Revision 1.3 2003/08/18 19:22:13 jcherry * Moved orf and seq_match to algo/sequence * * Revision 1.2 2003/08/13 16:42:11 dicuccio * Compilation fixes for MSVC * * Revision 1.1 2003/08/12 18:52:58 jcherry * Initial version * * =========================================================================== */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?