⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reparse.cpp

📁 功能比较强的正则表达式分析器
💻 CPP
📖 第 1 页 / 共 4 页
字号:
//---------------------------------------------------------------------------


#pragma hdrstop

#include <compiler.h>

#ifdef __REparse_debug__
#include <iostream.h>
#include <assert.h>
#endif

//---------------------------------------------------------------------------

#pragma package(smart_init)

#ifdef compilerH
using namespace Compiler::REParse;
#else
using namespace REParse;
#endif

typedef unsigned char MarkFlag;

const LogicalInfinite               = -1;

const MarkFlag MarkFlagNull         = 0;
const MarkFlag MarkFlagLeft         = 1;
const MarkFlag MarkFlagRight        = 2;
const MarkFlag MarkFlagBoth         = 3;
const MarkFlag MarkFlagBothVisited  = 4;
const MarkFlag MarkFlagBasicMask    = 0x07;
const MarkFlag MarkFlagEventMask    = 0x10;

//  MachineCore::Tag

MachineCore::Tag::Tag(int left, int right)
{
    this->left = left;
    this->right = right;
}

inline void MachineCore::Tag::reset()
{
    left = 0;
    right = -1;
}

//  MachineCore::State::CountRange

MachineCore::State::CountRange::CountRange(int low, int high)
{
    this->low = low;
    this->high = high;
}

//  MachineCore::TagTrie

MachineCore::TagTrie::TagTrie()
{
    root = new TagNode;
}

void MachineCore::TagTrie::destroy()
{
    int count = root->branches.getCount();
    for(int i = 0; i < count; i++)
    {
        if(root->branches[i])
        {
            clear(root->branches[i]);
        }
    }
    root->branches.clear();
}

//  MachineCore::State::CounterData

MachineCore::State::CounterData::CounterData()
{
    userCount = 0;
}

MachineCore::State::CounterData::CounterData(const Countset &value)
{
    countset = value;
    userCount = 0;
}

//  MachineCore::State::Counter

MachineCore::State::Counter::Counter()
{
}

MachineCore::State::Counter::Counter(const Countset &value)
        : Reference<CounterData>(CounterData(value))
{
}

MachineCore::State::CounterData &MachineCore::State::Counter::retrieve()
{
    return __Base::__retrieve();
}

bool MachineCore::State::Counter::retrieve(
        MachineCore::State::CounterData &counterData) const
{
    return __Base::retrieve(counterData);
}

//  MachineCore::State::Transfer

MachineCore::State::Transfer::Transfer()
{
    style = Serial;
}

MachineCore::State::Transfer::Transfer(Style style)
{
    this->style = style;
}

MachineCore::State::Transfer::Transfer(Tag *tag, State *outlet)
{
    style = Transfer::Shortcut;
    this->tag = tag;
    this->outlet = outlet;
}

MachineCore::State::Transfer::Transfer(Style style,
        const CharList &charList, State *outlet)
{
    this->style = style;
    this->charList = charList;
    this->outlet = outlet;
}

MachineCore::State::Mark::Mark()
{
    tag = NULL;
    flag = 0;
}

//  MachineCore::State

MachineCore::State::State()
{
}

MachineCore::State::~State()
{
}

//  MachineCore::Bridge::Begin

MachineCore::Bridge::Begin::Begin()
{
    this->state = NULL;
    multioutlet = false;
}

MachineCore::Bridge::Begin::Begin(State *state)
{
    this->state = state;
    multioutlet = false;
}

void MachineCore::Bridge::Begin::rebuild(State *state)
{
    this->state = state;
    multioutlet = false;
    precounter.release();
}

void MachineCore::Bridge::Begin::rebuild(State *state, bool multioutlet)
{
    this->state = state;
    this->multioutlet = multioutlet;
    precounter.release();
}

inline bool MachineCore::Bridge::Begin::marked()
{
    return state->mark.tag != NULL;
}

//  MachineCore::Bridge::End

MachineCore::Bridge::End::End(State *state, bool free)
{
    rebuild(state, free);
}

inline void MachineCore::Bridge::End::rebuild(State *state, bool free)
{
    this->state = state;
    this->free = free;
}

//  MachineCore::Bridge

MachineCore::Bridge::Bridge()
{
}

MachineCore::Bridge::Bridge(const Begin &begin, const End &end)
{
    rebuild(begin, end);
}

inline void MachineCore::Bridge::rebuild(const Begin &begin, const End &end)
{
    this->begin = begin;
    this->end = end;
}

MachineCore::Bridge::~Bridge()
{
}

inline void MachineCore::Bridge::link(State::Transfer::Style style,
        const CharList &charList)
{
    State::Transfer transfer(style, charList, end.state);
    transfer.counter = begin.precounter;
    begin.state->transfers.append(transfer);
}

inline void MachineCore::Bridge::shortcut(Tag *tag)
{
    State::Transfer transfer(tag, end.state);
    transfer.counter = begin.precounter;
    begin.state->transfers.append(transfer);
}

//  MachineCore::Element

MachineCore::Element::Element()
{
    type = Null;
}

MachineCore::Element::Element(const Element &copy)
{
    copyConstruct(copy);
}

MachineCore::Element::~Element()
{
    reset();
}

MachineCore::Element &MachineCore::Element::operator =(const Element &copy)
{
    if(type == copy.type)
    {
        switch(type)
        {
        case Character:
            ch = copy.ch;
        case Exclusive: case Inclusive:
            *charset = *copy.charset;
        case Shortcut:
            node = copy.node;
        case Count:
            *countset = *copy.countset;
        }
    } else {
        reset();
        copyConstruct(copy);
    }
    return *this;
}

void MachineCore::Element::copyConstruct(const Element &copy)
{
    switch(type = copy.type)
    {
    case Character:
        ch = copy.ch;
        break;
    case Exclusive: case Inclusive:
        charset = new CharList;
        *charset = *copy.charset;
        break;
    case Shortcut:
        node = copy.node;
        break;
    case Count:
        countset = new State::Countset;
        *countset = *copy.countset;
        break;
    }
}

void MachineCore::Element::reset()
{
    switch(type)
    {
    case Count:
        delete countset;
        break;
    case Exclusive: case Inclusive:
        delete charset;
        break;
    }
}

//  MachineCore::Topo::Topo

MachineCore::Topo::Topo(Style style)
{
    this->style = style;
}

//  MachineCore

inline bool MachineCore::isDigit(char c)
{
    return c >= '0' && c <= '9';
}

int MachineCore::getNumber(const char *expression, int len, int &pos)
{
    int result = 0;
    char ch;
    for(; pos < len && (ch = expression[pos]) >= '0' && ch <= '9'; pos++)
    {
        result *= 10;
        result += ch - '0';
    }
    return result;
}

int MachineCore::getHex(const char *expression, int len, int &pos)
{
    int result = 0;
    char ch;
    for(; pos < len; pos++)
    {
        result *= 16;
        if((ch = expression[pos]) >= 'A' && ch <= 'F')
            result += ch - 'A' + 10;
        else if(ch >= 'a' && ch <= 'f')
            result += ch - 'a' + 10;
        else if(ch >= '0' && ch <= '9')
            result += ch - '0';
        else break;
    }
    return result;
}

char MachineCore::getChar(const char *expression, int len, int &pos)
{
    if(pos >= len)
    {
        return 0;
    }
    switch(char ch = expression[pos++])
    {
    case '\\':
        if(pos >= len)
        {
            return 0;
        }
        switch(ch = expression[pos++])
        {
        case 'x': case 'X':
            if(pos + 2 > len)
            {
                return 0;
            }
            return (char)getHex(expression, pos + 2, pos);
        case 'n':
            return '\n';
        case 't':
            return '\t';
        default:
            return ch;
        }
    default:
        return ch;
    }
}

MachineCore::Element::Type MachineCore::getCharset(CharList *charset,
        const char *expression, int len, int &pos)
{
    char ch1 = 0, ch2;
    MachineCore::Element::Type result;
    if(expression[pos] == '^')
    {
        pos++;
        result = Element::Exclusive;
    }
    else
    {
        result = Element::Inclusive;
    }
    while(1)
    {
        switch(expression[pos])
        {
        case ']':
            pos++;
            return result;
        case '-':
            ch2 = getChar(expression, len, ++pos);
            while(ch1 < ch2)
            {
                charset->append(++ch1);
            }
        default:
            ch1 = getChar(expression, len, pos);
            charset->append(ch1);
        }
    }
}

void MachineCore::skipSpace(const char *expression, int len, int &pos)
{
    for(; pos < len && expression[pos] == ' '; pos++);
}

void MachineCore::goCountRangeEnd(const char *expression, int len, int &pos)
{
    for(; pos < len && expression[pos] != ',' && expression[pos] != '}'; pos++);
}

MachineCore::State::CountRange MachineCore::getCountRange(
        const char *expression, int len, int &pos)
{
    State::CountRange result;
    skipSpace(expression, len, pos);
    result.high = result.low = getNumber(expression, len, pos);
    skipSpace(expression, len, pos);
    switch(expression[pos++])
    {
    case ',': case '}':
        break;
    case '-':
        skipSpace(expression, len, pos);
        if(!isDigit(expression[pos]))
        {
            result.high = LogicalInfinite;
        }
        else
        {
            result.high = getNumber(expression, len, pos);
        }
        goCountRangeEnd(expression, len, pos);
        pos++;
    }
    return result;
}

void MachineCore::getCountset(State::Countset *countset, const char *expression,
        int len, int &pos)
{
    for(; pos < len && expression[pos - 1] != '}';
            countset->append(getCountRange(expression, len, pos)));
}

MachineCore::TagNode *MachineCore::getShortcut(const char *expression, int len,
        int &pos, TagNode *node)
{
    TagNode *result;
    if(expression[pos] == '_')
    {
        result = tagTrie.root;
        pos++;
    }
    else
    {
        result = node;
    }
    int index;
    while(1)
    {
        index = getNumber(expression, len, pos) - 1;
        if(index >= 0)
        {
            if(index < result->branches.getCount())
            {
                result = result->branches[index];
            }
            else
            {
                return NULL;
            }
        }
        if(pos >= len)
        {
            return result;
        }
        switch(expression[pos++])
        {
        case '\'':
            return result;
        case '.':
            break;
        default:
            pos--;
            return NULL;
        }
    }
}

void MachineCore::getElement(Element &elem, const char *expression, int len,
        int &pos, TagNode *node)
{
    elem.reset();
    switch(char ch = expression[pos++])
    {
    case '\\':
        ch = expression[pos];
        if(ch >= '0' && ch <= '9' || ch == '\_')
        {
            elem.type = Element::Shortcut;
            elem.node = getShortcut(expression, len, pos, node);
        }
        else
        {
            elem.type = Element::Character;
            elem.ch = getChar(expression, len, --pos);
        }
        break;
    case '.':
        elem.type = Element::AnyChar;
        break;
    case '[':
        elem.charset = new CharList;
        elem.type = getCharset(elem.charset, expression, len, pos);
        break;
    case '{':
        elem.type = Element::Count;
        elem.countset = new State::Countset;
        getCountset(elem.countset, expression, len, pos);
        break;
    case '+':
        elem.type = Element::Plus;
        break;
    case '*':
        elem.type = Element::AnyTimes;
        break;
    default:
        elem.type = Element::Character;
        elem.ch = ch;
        break;
    }
}

void MachineCore::createPureSerial(const CharList &charList,
        Bridge::Begin &begin, Bridge::End &end, bool finished)
{
    if(finished && !end.free)
    {
        Bridge(begin, end.state).link(State::Transfer::Serial, charList);
    }
    else
    {
        Bridge bridge(begin, Bridge::End(new State));
        bridge.link(State::Transfer::Serial, charList);
        begin.rebuild(bridge.end.state);
    }
}

void MachineCore::createPureCharsetOnly(const CharList &charset,
        State::Transfer::Style style, Bridge::Begin &begin, Bridge::End &end,
        bool finished)
{
    if(finished && !end.free)
    {
        Bridge(begin, end.state).link(style, charset);
    }
    else
    {
        Bridge bridge(begin, Bridge::End(new State));
        bridge.link(style, charset);
        begin.rebuild(bridge.end.state);
    }
}

void MachineCore::createPureCharsetCount(const CharList &charset,
        State::Transfer::Style style, const State::Countset &countset,
        Bridge::Begin &begin, Bridge::End &end, bool finished)
{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -