⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reparse.cpp

📁 功能比较强的正则表达式分析器
💻 CPP
📖 第 1 页 / 共 4 页
字号:
    Bridge bridge;
    if(begin.multioutlet || begin.marked())
    {
        bridge.rebuild(begin, Bridge::End(new State));
        bridge.link();
        begin.state = bridge.end.state;
        begin.multioutlet = false;
    }
    begin.precounter = countset;
    bridge.rebuild(begin, Bridge::End(begin.state));
    bridge.link(style, charset);
    begin.multioutlet = true;
    begin.precounter.release();
    if(finished && !end.free)
    {
        bridge.rebuild(begin, Bridge::End(end.state));
        bridge.link();
    }
}

void MachineCore::createPureCharsetAnyTimes(const CharList &charset,
        State::Transfer::Style style, Bridge::Begin &begin, Bridge::End &end,
        bool finished)
{
    Bridge bridge;
    if(begin.multioutlet || begin.marked())
    {
        bridge.rebuild(begin, Bridge::End(new State));
        bridge.link();
        begin.rebuild(bridge.end.state);
    }
    bridge.rebuild(begin, Bridge::End(begin.state));
    bridge.link(style, charset);
    begin.multioutlet = true;
    begin.precounter.release();
    if(finished && !end.free)
    {
        bridge.rebuild(begin, Bridge::End(end.state));
        bridge.link();
    }
}

void MachineCore::createPureCharsetPlus(const CharList &charset,
        State::Transfer::Style style, Bridge::Begin &begin, Bridge::End &end,
        bool finished)
{
    Bridge bridge;
    if(begin.multioutlet || begin.marked())
    {
        bridge.rebuild(begin, Bridge::End(new State));
        bridge.link();
        begin.rebuild(bridge.end.state);
    }
    bridge.rebuild(begin, Bridge::End(new State));
    bridge.link(style, charset);
    begin.rebuild(bridge.end.state, true);
    bridge.rebuild(begin, Bridge::End(bridge.begin.state));
    bridge.link();
    if(finished && !end.free)
    {
        bridge.rebuild(begin, Bridge::End(end.state));
        bridge.link();
    }
}

void MachineCore::createPureShortcutOnly(TagNode *tagNode, Bridge::Begin &begin,
        Bridge::End &end, bool finished)
{
    if(finished && !end.free)
    {
        Bridge(begin, end.state).shortcut(&tagNode->entry);
    }
    else
    {
        Bridge bridge(begin, Bridge::End(new State));
        bridge.shortcut(&tagNode->entry);
        begin.rebuild(bridge.end.state);
    }
}

void MachineCore::createPureShortcutCount(TagNode *tagNode,
        const State::Countset &countset, Bridge::Begin &begin, Bridge::End &end,
        bool finished)
{
    Bridge bridge;
    if(begin.multioutlet || begin.marked())
    {
        bridge.rebuild(begin, Bridge::End(new State));
        bridge.link();
        begin.state = bridge.end.state;
        begin.multioutlet = false;
    }
    begin.precounter = countset;
    bridge.rebuild(begin, Bridge::End(begin.state));
    bridge.shortcut(&tagNode->entry);
    begin.multioutlet = true;
    begin.precounter.release();
    if(finished && !end.free)
    {
        bridge.rebuild(begin, Bridge::End(end.state));
        bridge.link();
    }
}

void MachineCore::createPureShortcutAnyTimes(TagNode *tagNode,
        Bridge::Begin &begin, Bridge::End &end, bool finished)
{
    Bridge bridge;
    if(begin.multioutlet || begin.marked())
    {
        bridge.rebuild(begin, Bridge::End(new State));
        bridge.link();
        begin.rebuild(bridge.end.state);
    }
    bridge.rebuild(begin, Bridge::End(begin.state));
    bridge.shortcut(&tagNode->entry);
    begin.multioutlet = true;
    begin.precounter.release();
    if(finished && !end.free)
    {
        bridge.rebuild(begin, Bridge::End(end.state));
        bridge.link();
    }
}

void MachineCore::createPureShortcutPlus(TagNode *tagNode, Bridge::Begin &begin,
    Bridge::End &end, bool finished)
{
    Bridge bridge;
    if(begin.multioutlet || begin.marked())
    {
        bridge.rebuild(begin, Bridge::End(new State));
        bridge.link();
        begin.rebuild(bridge.end.state);
    }
    bridge.rebuild(begin, Bridge::End(new State));
    bridge.shortcut(&tagNode->entry);
    begin.rebuild(bridge.end.state, true);
    bridge.rebuild(begin, Bridge::End(bridge.begin.state));
    bridge.link();
    if(finished && !end.free)
    {
        bridge.rebuild(begin, Bridge::End(end.state));
        bridge.link();
    }
}

bool MachineCore::createPureChar(MachineCore::Element &elem,
        const char *expression, int len, int &pos, Bridge::Begin &begin,
        Bridge::End &end, TagNode *node)
{
    CharList charList;
    charList.append(elem.ch);
    if(pos >= len)
    {
        createPureSerial(charList, begin, end, true);
        return false;
    }
    MachineCore::Element lastElem = elem;
    getElement(elem, expression, len, pos, node);
    switch(elem.type)
    {
    case Element::Character:
        return createPureCharChar(elem, charList, expression, len, pos, begin,
                end, node);
    case Element::Count:
        createPureCharsetCount(charList, State::Transfer::Inclusive,
                *elem.countset, begin, end, pos >= len);
        return false;
    case Element::AnyTimes:
        createPureCharsetAnyTimes(charList, State::Transfer::Inclusive, begin,
                end, pos >= len);
        return false;
    case Element::Plus:
        createPureCharsetPlus(charList, State::Transfer::Inclusive, begin, end,
                pos >= len);
        return false;
    default:
        createPureSerial(charList, begin, end, false);
        return true;
    }
}

bool MachineCore::createPureCharChar(MachineCore::Element &elem,
        CharList &charList, const char *expression, int len, int &pos,
        Bridge::Begin &begin, Bridge::End &end, TagNode *node)
{
    if(pos >= len)
    {
        charList.append(elem.ch);
        createPureSerial(charList, begin, end, true);
        return false;
    }
    MachineCore::Element lastElem = elem;
    getElement(elem, expression, len, pos, node);
    switch(elem.type)
    {
    case Element::Character:
        charList.append(lastElem.ch);
        return createPureCharChar(elem, charList, expression, len, pos, begin,
                end, node);
    case Element::Count:
        createPureSerial(charList, begin, end, false);
        charList.clear();
        charList.append(lastElem.ch);
        createPureCharsetCount(charList, State::Transfer::Inclusive,
                *elem.countset, begin, end, pos >= len);
        return false;
    case Element::AnyTimes:
        createPureSerial(charList, begin, end, false);
        charList.clear();
        charList.append(lastElem.ch);
        createPureCharsetAnyTimes(charList, State::Transfer::Inclusive, begin,
                end, pos >= len);
        return false;
    case Element::Plus:
        createPureSerial(charList, begin, end, false);
        charList.clear();
        charList.append(lastElem.ch);
        createPureCharsetPlus(charList, State::Transfer::Inclusive, begin, end,
                pos >= len);
        return false;
    default:
        charList.append(lastElem.ch);
        createPureSerial(charList, begin, end, false);
        return true;
    }
}

bool MachineCore::createPureAnyChar(MachineCore::Element &elem,
        const char *expression, int len, int &pos, Bridge::Begin &begin,
        Bridge::End &end, TagNode *node)
{
    if(pos >= len)
    {
        createPureCharsetOnly(CharList(), State::Transfer::Exclusive, begin,
                end, true);
        return false;
    }
    MachineCore::Element lastElem = elem;
    getElement(elem, expression, len, pos, node);
    switch(elem.type)
    {
    case Element::Count:
        createPureCharsetCount(CharList(), State::Transfer::Exclusive,
                *elem.countset, begin, end, pos >= len);
        return false;
    case Element::AnyTimes:
        createPureCharsetAnyTimes(CharList(), State::Transfer::Exclusive,
                begin, end, pos >= len);
        return false;
    case Element::Plus:
        createPureCharsetPlus(CharList(), State::Transfer::Exclusive, begin,
                end, pos >= len);
        return false;
    default:
        createPureCharsetOnly(CharList(), State::Transfer::Exclusive, begin,
                end, false);
        return true;
    }
}

bool MachineCore::createPureCharset(MachineCore::Element &elem,
        const char *expression, int len, int &pos, Bridge::Begin &begin,
        Bridge::End &end, TagNode *node)
{
    State::Transfer::Style style = (elem.type == Element::Inclusive)?
            State::Transfer::Inclusive : State::Transfer::Exclusive;
    if(pos >= len)
    {
        createPureCharsetOnly(*elem.charset, style, begin, end, true);
        return false;
    }
    MachineCore::Element lastElem = elem;
    getElement(elem, expression, len, pos, node);
    switch(elem.type)
    {
    case Element::Count:
        createPureCharsetCount(*lastElem.charset, style, *elem.countset, begin,
                end, pos >= len);
        return false;
    case Element::AnyTimes:
        createPureCharsetAnyTimes(*lastElem.charset, style, begin, end,
                pos >= len);
        return false;
    case Element::Plus:
        createPureCharsetPlus(*lastElem.charset, style, begin, end, pos >= len);
        return false;
    default:
        createPureCharsetOnly(*lastElem.charset, style, begin, end, false);
        return true;
    }
}

bool MachineCore::createPureShortcut(MachineCore::Element &elem,
        const char *expression, int len, int &pos, Bridge::Begin &begin,
        Bridge::End &end, TagNode *node)
{
    if(pos >= len)
    {
        createPureShortcutOnly(elem.node, begin, end, true);
        return false;
    }
    MachineCore::Element lastElem = elem;
    getElement(elem, expression, len, pos, node);
    switch(elem.type)
    {
    case Element::Count:
        createPureShortcutCount(lastElem.node, *elem.countset, begin, end,
                pos >= len);
        return false;
    case Element::AnyTimes:
        createPureShortcutAnyTimes(lastElem.node, begin, end, pos >= len);
        return false;
    case Element::Plus:
        createPureShortcutPlus(lastElem.node, begin, end, pos >= len);
        return false;
    default:
        createPureShortcutOnly(lastElem.node, begin, end, false);
        return true;
    }
}

bool MachineCore::createPureTrigger(MachineCore::Element &elem,
        const char *expression, int len, int &pos, Bridge::Begin &begin,
        Bridge::End &end, TagNode *node)
{
    switch(elem.type)
    {
    case Element::Character:
        return createPureChar(elem, expression, len, pos, begin, end, node);
    case Element::AnyChar:
        return createPureAnyChar(elem, expression, len, pos, begin, end, node);
    case Element::Inclusive: case Element::Exclusive:
        return createPureCharset(elem, expression, len, pos, begin, end, node);
    case Element::Shortcut:
        return createPureShortcut(elem, expression, len, pos, begin, end, node);
    default:
        if(end.free)
        {
            Bridge(begin, Bridge::End(new State)).link();
        }
        else
        {
            Bridge bridge(begin, end.state);
            bridge.link();
            begin.rebuild(bridge.end.state);
        }
    }
    pos = len;
    return false;
}

void MachineCore::createPure(const char *expression, int len,
        Bridge::Begin &begin, Bridge::End &end, TagNode *node)
{
    int pos = 0;
    MachineCore::Element elem;
    bool res = false;
    while(res || pos < len)
    {
        if(!res)
        {
            getElement(elem, expression, len, pos, node);
        }
        res = createPureTrigger(elem, expression, len, pos, begin, end, node);
    }
}

void MachineCore::createTopoNormal(const char *expression, int len,
        Bridge::Begin &begin, TagNode *node)
/*  Pre:
    Post:   begin = (bridge.end.state, false, (NULL))   */
{
    Bridge bridge(begin, Bridge::End(new State));
    createParallel(expression, len, bridge, node);
    begin.state = bridge.end.state;
}

void MachineCore::createTopoCount(const State::Countset &topoCountset,
        const char *expression, int len, Bridge::Begin &begin, TagNode *node)
/*  Pre:
    Post:   begin = (begin.state, true, (NULL))   */
{
    begin.precounter = topoCountset;
    Bridge bridge(begin, begin.state);
    createParallel(expression, len, bridge, node);
    begin.multioutlet = true;
    begin.precounter.release();
}

void MachineCore::createTopoAny(const char *expression, int len,
        Bridge::Begin &begin, TagNode *node)
/*  Pre:
    Post:   begin = (begin.state, true, (NULL))   */
{
    Bridge bridge(begin, begin.state);
    createParallel(expression, len, bridge, node);
    begin.multioutlet = true;
}

void MachineCore::createTopoPlus(const char *expression, int len,
        Bridge::Begin &begin, TagNode *node)
/*  Pre:
    Post:   begin = (bridge.end.state, true, (NULL))   */
{
    Bridge bridge(begin, Bridge::End(new State));
    createParallel(expression, len, bridge, node);
    begin.state = bridge.end.state;
    begin.multioutlet = true;
    bridge.rebuild(begin, Bridge::End(bridge.begin.state));
    bridge.link();
}

void MachineCore::createTopo(const Topo &topo, const char *expression,
    int len, Bridge::Begin &begin, TagNode *node)
/*  Pre:    #len# is the length of the expression including the terminator
            after the last valid character.
    Post:   */
{
    switch(topo.style)
    {
    case Topo::Count:
        createTopoCount(topo.countset, expression, len, begin, node);
        break;
    case Topo::Normal:
        createTopoNormal(expression, len, begin, node);
        break;
    case Topo::Any:
        createTopoAny(expression, len, begin, node);
        break;
    case Topo::Plus:
        createTopoPlus(expression, len, begin, node);
        break;
    }
}

MachineCore::Topo MachineCore::parseTopo(const char *expression, int len,
        int &pos)
/*  Pre:    #expression# refers to(at least partly) valid
            sub-regular-expression,  #pos# indicates the position of
            subexpression which determines the style
    Post:   return topoinfo, set #pos# next to parsed substring */
{
    Topo result;
    switch(expression[pos++])
    {
    case '*':
        result.style = Topo::Any;
        return result;
    case '+':
        result.style = Topo::Plus;
        return result;
    case '{':
        result.style = Topo::Count;
        getCountset(&result.countset, expression, len, pos);
        return result;
    default:
        pos--;
        result.style = Topo::Normal;
        return result;
    }
}

void MachineCore::clearTags(TagNode *node, int branchCount)
{
    for(int i = 0; i < branchCount; i++)
    {
        node->branches[i]->entry.left = 0;
        node->branches[i]->entry.right = -1;
    }
}

void MachineCore::createSerial(const char *expression, int len,
        const Bridge &bridge, TagNode *node, int branchCount)
{
    int i, sublen, pos = 0;
    Topo topo;
    Bridge::Begin begin = bridge.begin;
    for(i = 0; i < branchCount; i++)
    {
        if((sublen = node->branches[i]->entry.left - pos) > 0)
        {
            createPure(expression + pos, sublen, begin, Bridge::End(), node);
        }
        if(begin.marked() || begin.multioutlet)
        //  any precounter will be released here
        {
            Bridge temp(begin, Bridge::End(new State));
            temp.link();
            begin.rebuild(temp.end.state);
        }
        topo = parseTopo(expression, len,
                pos = node->branches[i]->entry.right + 1);
        createTopo(topo, expression + node->branches[i]->entry.left + 1,
                node->branches[i]->entry.right - node->branches[i]->entry.left,
                begin, node->branches[i]);
    }
    if((sublen = len - pos) > 0)
    {
        createPure(expression + pos, sublen, begin,
                Bridge::End(bridge.end.state, false), node);
    }
    else
    {
        Bridge temp(begin, bridge.end.state);
        temp.link();
    }
    clearTags(node, branchCount);
}

void MachineCore::makeMark(Bridge &bridge, TagNode *node)
{
    if(node == tagTrie.root)
    {
        return;
    }
    if(bridge.begin.state == bridge.end.state)
    {
        bridge.end.state->mark.tag = &node->entry;
        bridge.end.state->mark.flag = MarkFlagBoth;
    }
    else
    {
        bridge.begin.state->mark.tag = &node->entry;
        bridge.begin.state->mark.flag = MarkFlagLeft;
        bridge.end.state->mark.tag = &node->entry;
        bridge.end.state->mark.flag = MarkFlagRight;
    }
}

void MachineCore::makeNode(TagNode *node, int pos, int left, int right)
{
    if(pos < node->branches.getCount())
    {
        node->branches[pos]->entry = Tag(left, right);
    }
    else
    {
        TagNode *newNode = new TagNode(Tag(left, right));
        node->branches.append(newNode);
    }
}

void MachineCore::createParallel(const char *expression, int len,
        Bridge &bridge, TagNode *node)
/*  Pre:    #len# is the length of the expression including the terminator after
            the last valid character
    Post: */
{
    int i, branchCount = 0;
    int posLeftBracket, posOld = 0;
    int backSlashCount = 0;
    int bracketDepth = 0;
    char ch;
    makeMark(bridge, node);
    for(i = 0; i < len; i++)
    {
        if((ch = expression[i]) == '\\')
        {
            backSlashCount++;
        }
        else
        {
            if(backSlashCount % 2 == 0)
            {
                switch(ch)
                {
                case '(':
                    if(bracketDepth++ == 0)
                    {
                        posLeftBracket = i;
                    }
                    break;
                case ')':
                    if(--bracketDepth == 0)
                    {
                        makeNode(node, branchCount++, posLeftBracket - posOld,
                                i - posOld);
                        break;
                    }
                    else if(bracketDepth > 0)
                    {
                        break;
                    }
                case '|': case '\0': // '|', '\0', ')' 
                    if(ch == ')' && bracketDepth < 0 || bracketDepth == 0)
                    {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -