⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reparse.h

📁 功能比较强的正则表达式分析器
💻 H
字号:
//---------------------------------------------------------------------------

#ifndef REparse2H
#define REparse2H
//---------------------------------------------------------------------------

#include <contnrs\lists.h>
#include <contnrs\stacks.h>
#include <contnrs\trees.h>
#include <contnrs\ref.h>

// #define __REparse_debug__
#define __REparse_optimization__
#define __REmachine_as_Reference__

////////////////////////////////
namespace REParse {
////////////////////////////////

using namespace Lists;
using namespace Stacks;
using namespace Trees;

class MachineCore
{
private:
#ifdef __REmachine_as_Reference__
    friend class Machine;
#endif
    friend class Parser;
//  type definition in "MachineCore"

    struct Tag  // MachineCore::Tag
    {
        int left, right;

        Tag(int left = 0, int right = -1);
        void reset();
    };

    typedef TrieNode<Tag>   TagNode;

    struct TagTrie : public Trie<Tag>   // MachineCore::TagTrie
    {
        friend class MachineCore;
        friend class Parser;
        
        TagTrie();
        void destroy();
    };

    typedef ContiguousList<char> CharList;

    struct State
    // MachineCore::State
    {
        struct CountRange
        // MachineCore::State::CountRange
        {
            int low, high;

            CountRange(int low = 0, int high = 0);
        };

        typedef ContiguousList<CountRange> Countset;

        struct CounterData
        // MachineCore::State::CounterData
        {
            Countset countset;
            int userCount;

            CounterData();
            CounterData(const Countset &value);
        };

        struct Counter : public Reference<CounterData>
        // MachineCore::State::Counter
        {
            typedef Reference<CounterData>  __Base;
            Counter();
            Counter(const Countset &value);
            CounterData &retrieve();
            bool retrieve(CounterData &counterData) const;
        };

        struct Transfer
        // MachineCore::State::Transfer
        {
            enum Style {Serial, Inclusive, Exclusive, Shortcut};
            Tag *tag;
            CharList charList;

            Style style;
            Counter counter;
            State *outlet;

            Transfer();
            Transfer(Style style);
            Transfer(Tag *tag, State *outlet);
            Transfer(Style style, const CharList &charList,
                    State *outlet = NULL);
        };

        typedef ContiguousList<Transfer> Transfers; 

        struct Mark
        // MachineCore::State::Mark
        {
            typedef unsigned char Flag;
            Flag flag;
            Tag *tag;

            Mark();
        };

        Transfers transfers;
        Mark mark;

        State();
        ~State();
    };  // struct State

    struct Bridge
    //  MachineCore::Bridge
    {
        struct Begin
        //  MachineCore::Bridge::Begin
        {
            State *state;
            bool multioutlet;   // multioutlet flag, set by specific programs
            State::Counter precounter;

            Begin();
            Begin(State *state);

            void rebuild(State *state);
            void rebuild(State *state, bool multioutlet);
            bool marked();
        };

        struct End
        //  MachineCore::Bridge::End
        {
            State *state;
            bool free;

            End(State *state = NULL, bool free = true);
            
            void rebuild(State *state = NULL, bool free = true);
        };

        Begin begin;
        End end;

        Bridge();
        Bridge(const Begin &begin, const End &end);
        ~Bridge();

        void rebuild(const Begin &begin, const End &end);
        void link(State::Transfer::Style style = State::Transfer::Serial,
                const CharList &charList = CharList());
        void shortcut(Tag *tag);
    };  // struct Bridge

    struct Element
    //  MachineCore::Element
    {
        enum Type {Null, Character, Exclusive, Inclusive, AnyChar, Shortcut,
                Count, Plus, AnyTimes} type;

        union
        {
            char ch;
            State::Countset *countset;
            TagNode *node;
            CharList *charset;
        };

        Element();
        Element(const Element &copy);
        ~Element();
        Element &operator =(const Element &copy);
        void copyConstruct(const Element &copy);
        void reset();
    };

    struct Topo
    //  MachineCore::Topo
    {
        enum Style {Normal, Count, Any, Plus};

        Style style;
        State::Countset countset;

        Topo(Style style = Count);
    };

    typedef ContiguousList<State *> ClearList;

    struct EndInfo
    {
        int i;
        bool pure;
    };

//  private member variable(s)
    State *start;
    TagTrie tagTrie;

//  private member function(s)
    //  parsing elements
    bool isDigit(char c);
    int getNumber(const char *expression, int len, int &pos);
    int getHex(const char *expression, int len, int &pos);
    char getChar(const char *expression, int len, int &pos);
    Element::Type getCharset(CharList *charset, const char *expression,
            int len, int &pos);
    void skipSpace(const char *expression, int len, int &pos);
    void goCountRangeEnd(const char *expression, int len, int &pos);
    State::CountRange getCountRange(const char *expression, int len,
            int &pos);
    void getCountset(State::Countset *countset, const char *expression,
            int len, int &pos);
    TagNode *getShortcut(const char *expression, int len, int &pos,
            TagNode *node);
    void getElement(Element &elem, const char *expression, int len,
            int &pos, TagNode *node);

    //  creating pure structures
    void createPureSerial(const CharList &charList, Bridge::Begin &begin,
            Bridge::End &end, bool finished);
    void createPureCharsetOnly(const CharList &charset,
            State::Transfer::Style style, Bridge::Begin &begin,
            Bridge::End &end, bool finished);
    void createPureCharsetCount(const CharList &charList,
            State::Transfer::Style style, const State::Countset &countset,
            Bridge::Begin &begin, Bridge::End &end, bool finished);
    void createPureCharsetAnyTimes(const CharList &charset,
            State::Transfer::Style style, Bridge::Begin &begin,
            Bridge::End &end, bool finished);
    void createPureCharsetPlus(const CharList &charset,
            State::Transfer::Style style, Bridge::Begin &begin,
            Bridge::End &end, bool finished);
    void createPureShortcutOnly(TagNode *tagNode, Bridge::Begin &begin,
            Bridge::End &end, bool finished);
    void createPureShortcutCount(TagNode *tagNode,
            const State::Countset &countset, Bridge::Begin &begin,
            Bridge::End &end, bool finished);
    void createPureShortcutAnyTimes(TagNode *tagNode,
            Bridge::Begin &begin, Bridge::End &end, bool finished);
    void createPureShortcutPlus(TagNode *tagNode, Bridge::Begin &begin,
            Bridge::End &end, bool finished);
    bool createPureChar(Element &elem, const char *expression, int len,
            int &pos, Bridge::Begin &begin, Bridge::End &end, TagNode *node);
    bool createPureCharChar(Element &elem, CharList &charList,
            const char *expression, int len, int &pos, Bridge::Begin &begin,
            Bridge::End &end, TagNode *node);
    bool createPureAnyChar(Element &elem, const char *expression,
            int len, int &pos, Bridge::Begin &begin, Bridge::End &end,
            TagNode *node);
    bool createPureCharset(Element &elem, const char *expression,
            int len, int &pos, Bridge::Begin &begin, Bridge::End &end,
            TagNode *node);
    bool createPureShortcut(Element &elem, const char *expression,
            int len, int &pos, Bridge::Begin &begin, Bridge::End &end,
            TagNode *node);
    bool createPureTrigger(Element &elem, const char *expression,
            int len, int &pos, Bridge::Begin &begin, Bridge::End &end,
            TagNode *node);
    void createPure(const char *expression, int len, Bridge::Begin &begin,
            Bridge::End &end, TagNode *node);

    //  creating topo structures
    void createTopoNormal(const char *expression, int len,
            Bridge::Begin &begin, TagNode *node);
    void createTopoCount(const State::Countset &topoCountset,
            const char *expression, int len, Bridge::Begin &begin,
            TagNode *node);
    void createTopoAny(const char *expression, int len,
            Bridge::Begin &begin, TagNode *node);
    void createTopoPlus(const char *expression, int len,
            Bridge::Begin &begin, TagNode *node);
    void createTopo(const Topo &topo, const char *expression, int len,
            Bridge::Begin &begin, TagNode *node);
    Topo parseTopo(const char *expression, int len, int &pos);

    //  macro creating
    void clearTags(TagNode *node, int branchCount);
    void createSerial(const char *expression, int len, const Bridge &bridge,
            TagNode *node, int branchCount);
    void makeNode(TagNode *node, int pos, int left = 0, int right = 0);
    void makeMark(Bridge &bridge, TagNode *node);
    void createParallel(const char *expression, int len,
                             Bridge &bridge, TagNode *node);

    void destroyFrom(State *state, ClearList &list);

public:
    MachineCore();
    MachineCore(const char *expression, int len);
    ~MachineCore();

    void create(const char *expression, int len);
    void destroy();

#ifdef __REparse_debug__
    bool isEnd(State *state) const;
    void viewTransfer(State::Transfer &transfer) const;
    void viewFromState(State *state, ClearList &list) const;
    void viewTree(TagNode *node) const;
    void view() const;
#endif

};  // class MachineCore

#ifdef __REmachine_as_Reference__

class Machine : protected Reference<MachineCore>
{
private:
    typedef Reference<MachineCore>          __Base;
    typedef ReferencedObject<MachineCore>   __Refd;
    typedef MachineCore::State::Transfer    Transfer;
    typedef MachineCore::State              State;
    typedef MachineCore::TagNode            TagNode;
    typedef MachineCore::ClearList          ClearList;
public:
    Machine();
    Machine(const char *expression, int len);
    ~Machine();
    Machine &operator =(const Machine &copy);

    MachineCore *operator ->();

    void create(const char *expression, int len);
    void destroy();
};  // class MachineRef

#else
typedef MachineCore Machine;
#endif  // #ifdef __REmachine_as_Reference__

class Parser
#ifdef __REmachine_as_Reference__
#define __machine_member__(x)   machine.##x
: protected Reference<MachineCore>
#else
#define __machine_member__(x)   machine->##x
#endif
{
private:
    enum StateRes {Matched, Unmatched, Success, Failure};
    enum CounterStat {Noncount = 0, Inside, Outside, Max};

    typedef MachineCore::State              State;
    typedef unsigned char               MarkFlag;
    typedef MachineCore::State::Transfer    Transfer;
    typedef ContiguousList<Transfer *>  TransferList;
    typedef MachineCore::TagNode            TagNode;
    typedef MachineCore::ClearList          ClearList;

#ifdef __REmachine_as_Reference__
    typedef Reference<MachineCore>          __Base;
    typedef ReferencedObject<MachineCore>   __Refd;
#endif

    struct Track;

    struct Packet
    {
        enum Style{Noncount, Inside, Outside, Reserved, PureCountLoop,
                NoncountLoop, NoncountLoopVisited, CountLoop, Single};
                
        Style style;
        State *state;
        int count;
        union
        {
            struct
            {
                int pos;
                int index;
                int pace;
            };
            struct
            {
                int left;
                int right;
            };
        };

        void pushInto(Track &track);
        void popOutof(Track &track);
    };

    struct Track : public ContiguousStack<int>
    {
        void freeEvents();
        void clear();
    };

#ifndef __REmachine_as_Reference__
    Machine *machine;
#endif

    //  parsing data
    Track track;
    int transferNum;
    CounterStat counterStat;
    State *current;
    ClearList pioneers;
    TransferList forbiddenList;

    //  object string
    char *str;
    int len, pos;

    bool isPureCountLoop(State *state) const;
    bool counterAssigned(State *state) const;
    int getUserCount(State *state) const;
    void setUserCount(State *state, int value);
    MarkFlag getBasicFlag(State *state) const;
    void setBasicFlag(State *state, MarkFlag value);
    bool getEventFlag(State *state) const;
    void setEventFlag(State *state, bool value);

    bool isForbidden(Transfer *transfer) const;
    bool isPioneer(State *state) const;
    int matchSerial(Transfer &transfer);
    int matchInclusive(const Transfer &transfer) const;
    int matchExclusive(const Transfer &transfer) const;
    int matchShortcut(const Transfer &transfer) const;
    int matchAdvance(Transfer &transfer);
    int match(Transfer &transfer);

    CounterStat getCounterStat(State *state) const;
    bool enter(Packet &packet);
    void searchTransfer(int &index, int &pace);
    void leave(int index);
    void transfer(int index, int pace);
    void saveAttempt(int index, int pace);
    void saveExtra(Packet &packet);
    bool restore(Packet &packet);

    StateRes attemptNormal();
    StateRes attemptMax();
    StateRes attempt();
    StateRes reattempt();

    bool process();
    void cleanNode(TagNode *node);

public:

    Parser();
    ~Parser();
#ifdef __REmachine_as_Reference__
    Parser(const char *expression, int len);
    Parser &operator =(const Parser &copy);
    Parser &operator =(const Machine &m);

    void createMachine(const char *expression, int len);
    void destroyMachine();
    void release();
#else
    void setMachine(Machine *machine);
    Machine *getMachine();
#endif

    int parse(char *str, int len);
    bool getShortcut(char *expression, int len, int &left, int &right);

#ifdef __REparse_debug__
    void viewMachine() const;
#endif
};

}   // namespace REParse

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -