regexx.h

来自「Shorthand是一个强大的脚本语言」· C头文件 代码 · 共 153 行

H
153
字号
/////////////////////////////////////////////////////////////////////////////
// $Header: /shorthand/src/regexx.h 3     8/28/02 6:27a Arm $
//---------------------------------------------------------------------------
// This file is part of "libAndrix" library - a collection of classes
// and functions developed by Andrei Remenchuk.
//---------------------------------------------------------------------------
// While you may own complete copyright on the project with which you have
// received this file, the author reserves the right to use code contained
// in this very file for any purposes, including publishing and usage in
// any free or commercial software.
//
// You may re-distribute this file or re-use it in your own free or
// commercial software provided that this text is included in the file.
// If you change this file you must include clear notice stating that
// you changed this file and the date of change.
//
// This statement doesn't apply to other files that are part of the same
// package unless otherwise noted.
//---------------------------------------------------------------------------
// (c) 1998-2002 Andrei Remenchuk <andrei@remenchuk.com>
//---------------------------------------------------------------------------
// regexx.h - C++ wrapper of GNU regex library
/////////////////////////////////////////////////////////////////////////////
#ifndef __regexx_h
#define __regexx_h

#include "cstring.h"
#include "except.h"


typedef void (*replace_function_t)(const string& submatch, string& target);

extern const int RXX_NO_BK_PARENS;
extern const int RXX_NO_BK_VBAR;
extern const int RXX_INTERVALS;
extern const int RXX_NO_BK_BRACES;

/**
 * Regular expressions matcher/searcher class
 */
class RX
{
protected:
    string m_pattern;
    int    m_syntax_options;
    string m_content;
    
    void*  m_pattern_buffer;
    void*  m_registers;

protected:
    
    bool compile();
    void ctor();

public:
    
    // constructs RX from given pattern and syntax options
    RX(const char* pattern, int syntax_options = 0);

    // copy constructor
    RX(const RX& rx);

    // makes deep copy of another RX
    void aquire(const RX& rx);
    
    // clones this object
    RX* clone() const;


    /**
     * Tries to search regular expression within the string.
     * 
     * If the match was found, internal sub-pattern spaces are set so that
     * you can retrieve each sub-pattern by calling submatch() method.
     * 
     * parameters:
     *    s [IN] a string to match. RX makes internal copy of this string 
     *           which is kept until next call to match(), in order to be able
     *           to extract sub-matches.
     * return value:
     *    the index within the string where the match ended. this index
     *    can be used as 'start' parameter in next successive call to search().
     *    returns -1 if no match has beens found
     */
    int search(const char* s, int size, int start, bool make_copy = true);

    int search_no_exception(const char* s, int size, int start, bool make_copy = true);
    
    /**
     * Tries to match the string against this regular expression (GNU behavior).
     * If the string matched, internal sub-pattern spaces are set so that
     * you can retrieve each sub-pattern by calling submatch() method.
     * 
     * parameters:
     *    s [IN] a string to match. RX makes internal copy of this string 
     *           which is kept until next call to match(), in order to be able
     *           to extract sub-matches.
     * return value:
     *    true if the string has matched.
     *    false otherwise.
     */
    bool match(const char* s);

    bool match(const char* s, string& sub1);
    bool match(const char* s, string& sub1, string& sub2);
    bool match(const char* s, string& sub1, string& sub2, string& sub3);
    bool match(const char* s, string& sub1, string& sub2, string& sub3, string& sub4);
    bool match(const char* s, string& sub1, string& sub2, string& sub3, string& sub4, string& sub5);
    bool match(const char* s, string_array& subs);

    /**
     * extracts sub-string that has been matched by one of grouping operators '()'
     * returns true if index is valid.
     * sub-expression indexes start with 1, zero means entire matched text.
     * in case if index is out of range, the output string is emptied.
     */
    bool submatch(unsigned int index, string& sub, int offset = 0) const;

    /**
     * extracts sub-string that has been matched by one of grouping operators '()'
     * returns true if index is valid.
     * sub-expression indexes start with 1, zero means entire matched text.
     * in case if index is out of range, the output string is emptied.
     */
    bool submatch(const char* origin, unsigned int index, string& sub, int offset = 0) const;


    int replace(const char* source, const char* replacement, string& target);
    int replace(const char* source, replace_function_t replacement, string& target);

    // returns pattern
    const char* pattern() const { return m_pattern; }

    // returns syntax options
    int options() const { return m_syntax_options; }

    // frees all internal memory taken by pattern matching
    void shrink();


    ~RX();
};

// De-HTML-ifies  string (replaces &lt; &gt; &#5E, etc by their literal values)
// and removes HTML tags

void dehtmlify(const char* source, string& target);



#endif // __regexx_h

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?