📄 regexp.hpp
字号:
/* * =========================================================================== * PRODUCTION $Log: regexp.hpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:38:40 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== */#ifndef UTIL___REGEXP__HPP#define UTIL___REGEXP__HPP/* $Id: regexp.hpp,v 1000.2 2004/06/01 19:38:40 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Clifford Clausen * *//// @file regexp.hpp/// C++ wrappers for the Perl-compatible regular expression (PCRE) library.////// CRegexp - wrapper class for the PCRE library./// CRegexpUtil - utility functions.////// For more details see PCRE documentation: http://www.pcre.org/pcre.txt#include <corelib/ncbistd.hpp>#ifdef NCBI_COMPILER_MSVC#include <util/regexp/pcre.h>#else#include <pcre.h>#endif/** @addtogroup Regexp * * @{ */BEGIN_NCBI_SCOPE/// Specifies the maximum number of subpatterns that can be found.const size_t kRegexpMaxSubPatterns = 100;/////////////////////////////////////////////////////////////////////////////////// CRegexp --////// Define a wrapper class for the Perl-compatible regular expression (PCRE)/// library.////// Internally, this class holds a compiled regular expression used for/// matching with strings passed as an argument to the GetMatch()/// member function. The regular expression is passed as a argument/// to the constructor or to the Set() member function.////// Throw exception on error.class NCBI_XREGEXP_EXPORT CRegexp{public: /// Type definitions used for code clarity. typedef int TCompile; ///< Compilation options. typedef int TMatch; ///< Match options. /// Flags for compile regular expressions. /// /// PCRE compiler flags used in the constructor and in Set(). /// If eCompile_ignore_case is set, matches are case insensitive. /// If eCompile_dotall is set, a dot metacharater in the pattern matches /// all characters, including newlines. Without it, newlines are excluded. /// If eCompile_newline is set then ^ matches the start of a line and /// $ matches the end of a line. If not set, ^ matches only the start /// of the entire string and $ matches only the end of the entire string. /// If eCompile_ungreedy inverts the "greediness" of the quantifiers so /// that they are not greedy by default, but become greedy if followed by /// "?". /// It is not compatible with Perl. /// /// The settings can be changed from within the pattern by a sequence of /// Perl option letters enclosed between "(?" and ")". /// The option letters are: /// i for PCRE_CASELESS /// m for PCRE_MULTILINE /// s for PCRE_DOTALL /// x for PCRE_EXTENDED /// U for PCRE_UNGREEDY enum ECompile { eCompile_default = 0, eCompile_ignore_case = PCRE_CASELESS, eCompile_dotall = PCRE_DOTALL, eCompile_newline = PCRE_MULTILINE, eCompile_ungreedy = PCRE_UNGREEDY }; /// Flags for match string against a pre-compiled pattern. /// /// Setting eMatch_not_begin causes ^ not to match before the /// first character of a line. Without setting eCompile_newline, /// ^ won't match anything if eMatch_not_begin is set. /// Setting eMatch_not_end causes $ not to match immediately before a new /// line. Without setting eCompile_newline, $ won't match anything /// if eMatch_not_end is set. enum EMatch { eMatch_default = 0, eMatch_not_begin = PCRE_NOTBOL, ///< ^ won't match string begin. eMatch_not_end = PCRE_NOTEOL, ///< $ won't match string end. eMatch_not_both = PCRE_NOTBOL | PCRE_NOTEOL }; /// Constructor. /// /// Set and compile the PCRE pattern specified by argument according /// to compile options. Also allocate memory for compiled PCRE. /// @param pattern /// Perl regular expression to compile. /// @param flags /// Regular expression compilation flags. /// @sa /// ECompile CRegexp(const string& pattern, TCompile flags = 0); /// Destructor. /// /// Deallocate compiled Perl-compatible regular expression. virtual ~CRegexp(); /// Set and compile PCRE. /// /// Set and compile the PCRE pattern specified by argument according /// to compile options. Also deallocate/allocate memory for compiled PCRE. /// @param pattern /// Perl regular expression to compile. /// @param flags /// Regular expression compilation flags. /// @sa /// ECompile void Set(const string& pattern, TCompile flags = 0); /// Get matching pattern and subpatterns. /// /// Return a string corresponding to the match to pattern or subpattern. /// Set noreturn to true when GetSub() or GetResults() will be used /// to retrieve pattern and subpatterns. Calling GetMatch() causes /// the entire search to be performed again. If you want to retrieve /// a different pattern/subpattern from an already performed search, /// it is more efficient to use GetSub or GetResults. /// @param str /// String to search. /// @param offset /// Starting offset in str. /// @param idx /// (Sub) match to return. /// Use idx = 0 for complete pattern. Use idx > 0 for subpatterns. /// @param flags /// Flags to match. /// @param noreturn /// Return empty string if noreturn is true. /// @return /// Return (sub) match with number idx or empty string when no match /// found or if noreturn is true. /// @sa /// EMatch, GetSub(), GetResult() string GetMatch( const string& str, TSeqPos offset = 0, size_t idx = 0, TMatch flags = 0, bool noreturn = false ); /// Get pattern/subpattern from previous GetMatch(). /// /// Should only be called after GetMatch() has been called with the /// same string. GetMatch() internally stores locations on string where /// pattern and subpatterns were found. /// @param str /// String to search. /// @param idx /// (Sub) match to return. /// @return /// Return the substring at location of pattern match (idx 0) or /// subpattern match (idx > 0). Return empty string when no match. /// @sa /// GetMatch(), GetResult() string GetSub(const string& str, size_t idx = 0) const; /// Get number of patterns + subpatterns. /// /// @return /// Return the number of patterns + subpatterns found as a result /// of the most recent GetMatch() call. /// @sa /// GetMatch() int NumFound() const; /// Get location of pattern/subpattern. /// /// @param idx /// Index of pattern/subpattern to obtaining. /// Use idx = 0 for pattern, idx > 0 for sub patterns. /// @return /// Return array where index 0 is location of first character in /// pattern/sub pattern and index 1 is 1 beyond last character in /// pattern/sub pattern. /// Throws if called with idx >= NumFound(). /// @sa /// GetMatch(), NumFound() const int* GetResults(size_t idx) const;private: // Disable copy constructor and assignment operator. CRegexp(const CRegexp &); void operator= (const CRegexp &); /// Pointer to compiled PCRE pattern. pcre* m_PReg; /// Array of locations of patterns/subpatterns resulting from /// the last call to GetMatch(). Also contains 1/3 extra space used /// internally by the PCRE C library. int m_Results[(kRegexpMaxSubPatterns +1) * 3]; /// The total number of pattern + subpatterns resulting from /// the last call to GetMatch. int m_NumFound;};/////////////////////////////////////////////////////////////////////////////////// CRegexpUtil --////// Throw exception on error.class NCBI_XREGEXP_EXPORT CRegexpUtil{public: /// Constructor. /// /// Set string for processing. /// @param str /// String to process. /// @sa /// Exists(), Extract(), Replace(), ReplaceRange() CRegexpUtil(const string& str = kEmptyStr); /// Reset the content of the string to process. /// /// @param str /// String to process. /// @sa /// operator = void Reset(const string& str); /// Reset the content of the string to process. /// /// The same as Reset(). /// @param str /// String to process. /// @sa /// Reset() void operator= (const string& str); /// Get result string. /// /// @sa /// operator string string GetResult(void); /// Get result string. /// /// The same as GetResult(). /// @sa /// GetResult() operator string(void); /// Check existence substring which match a specified pattern. /// /// @param pattern /// Perl regular expression to search. /// @param compile_flags /// Regular expression compilation flags.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -