⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reparse.cpp

📁 功能比较强的正则表达式分析器
💻 CPP
📖 第 1 页 / 共 4 页
字号:
                        bridge.begin.multioutlet |= (ch == '|');
                        createSerial(expression + posOld, i - posOld,
                                bridge, node, branchCount);
                        branchCount = 0;
                        bracketDepth = 0;
                        posOld = i + 1;
                    }
                    break;
                }
            }
            backSlashCount = 0;
        }
    }
}

void MachineCore::destroyFrom(State *state, ClearList &list)
{
    int i, count = state->transfers.getCount();
    for(i = list.getCount(); --i >= 0;)
    {
        if(list[i] == state) return;
    }
    list.append(state);
    for(i = 0; i < count; i++)
    {
        if(state->transfers[i].outlet)
        {
            destroyFrom(state->transfers[i].outlet, list);
        }
    }
    delete state;
}

MachineCore::MachineCore()
{
    start = NULL;
}

MachineCore::MachineCore(const char *expression, int len)
{
    start = NULL;
    create(expression, len);
}

MachineCore::~MachineCore()
{
    if(start)
    {
        ClearList list;
        destroyFrom(start, list);
    }
}

void MachineCore::create(const char *expression, int len)
/*  Pre:    #expression# must be termintated in '\0',
            #len# is the length of the expression including the terminator,
            e.g. for expression "abc", in memory it should be like
            ('a','b','c','\0'), hence the length is 4.
    Post: */
{
    destroy();
    start = new State;
    Bridge bridge(start, NULL);
    createParallel(expression, len, bridge, tagTrie.root);
}

void MachineCore::destroy()
{
    if(start)
    {
        ClearList list;
        destroyFrom(start, list);
        start = NULL;
    }
    tagTrie.destroy();
}

////////////////////////////////////////////////////////////////////////////////

#ifdef __REmachine_as_Reference__
//  Machine

Machine::Machine()
{
}

Machine::Machine(const char *expression, int len)
{
    __Base::create();
    __retrieve().create(expression, len);
}

Machine::~Machine()
{
}

Machine &Machine::operator =(const Machine &copy)
{
    __Base::operator =(copy);
    return *this;
}

MachineCore *Machine::operator ->()
{
    if(!assigned())
    {
        return NULL;
    }
    return &__retrieve();
}

void Machine::create(const char *expression, int len)
{
    __Base::create();
    __retrieve().create(expression, len);
}

void Machine::destroy()
{
    if(assigned())
    {
        __retrieve().destroy();
    }
}

#endif  // #ifdef __REmachine_as_Reference__

////////////////////////////////////////////////////////////////////////////////

#ifdef __REparse_debug__
int stackDepth = 0, maxStackDepth = 0;
#define __push__(v) if(maxStackDepth<(stackDepth+=(v)))maxStackDepth=stackDepth
#define __pop__(v)  stackDepth-=(v)
#endif

//  Parser::Track

void Parser::Track::freeEvents()
{
    State *state;
    Packet::Style style;
    for(int i = depth - 1; i > 0 &&
            (style = (Packet::Style)items[i]) >= Packet::NoncountLoop;)
    {
        state = (State *)items[i - 1];
        state->mark.flag &= ~MarkFlagEventMask;
        switch(style)
        {
        case Packet::NoncountLoop: case Packet::NoncountLoopVisited:
            i -= 4;
            break;
        case Packet::CountLoop:
            i -= 5;
            break;
        case Packet::Single:
            i -= 3;
            break;
        }
    }
}

void Parser::Track::clear()
{
    freeEvents();
    ContiguousStack<int>::clear();
}

//  Parser::Packet

void Parser::Packet::pushInto(Track &tracks)
{
    switch(style)
    {
    case Noncount: case Inside: case Outside:
        tracks.push(pace);
        tracks.push(index);
        tracks.push(pos);
#ifdef __REparse_debug__
        __push__(5);
#endif
        break;
    case PureCountLoop:
        tracks.push(count);
        tracks.push(pace);
        tracks.push(index);
        tracks.push(pos);
#ifdef __REparse_debug__
        __push__(6);
#endif
        break;
    case NoncountLoop: case NoncountLoopVisited:
        tracks.push(right);
        tracks.push(left);
#ifdef __REparse_debug__
        __push__(4);
#endif
        break;
    case CountLoop:
        tracks.push(right);
        tracks.push(left);
        tracks.push(count);
#ifdef __REparse_debug__
        __push__(5);
#endif
        break;
    case Single:
        tracks.push(pos);
#ifdef __REparse_debug__
        __push__(3);
#endif
        break;
    }
    tracks.push((int)state);
    tracks.push((int)style);
}

void Parser::Packet::popOutof(Track &tracks)
{
    int value;
    tracks.pop(value);
    style = (Style)value;
    tracks.pop(value);
    state = (State *)value;
    switch(style)
    {
    case Noncount: case Inside: case Outside:
        tracks.pop(pos);
        tracks.pop(index);
        tracks.pop(pace);
#ifdef __REparse_debug__
        __pop__(5);
#endif
        break;
    case PureCountLoop:
        tracks.pop(pos);
        tracks.pop(index);
        tracks.pop(pace);
        tracks.pop(count);
#ifdef __REparse_debug__
        __pop__(6);
#endif
        break;
    case NoncountLoop: case NoncountLoopVisited:
        tracks.pop(left);
        tracks.pop(right);
#ifdef __REparse_debug__
        __pop__(4);
#endif
        break;
    case CountLoop:
        tracks.pop(count);
        tracks.pop(left);
        tracks.pop(right);
#ifdef __REparse_debug__
        __pop__(5);
#endif
        break;
    case Single:
        tracks.pop(pos);
#ifdef __REparse_debug__
        __pop__(3);
#endif
        break;
    }
}

//  Parser

inline bool Parser::isPureCountLoop(State *state) const
{
    return state->transfers[0].counter.assigned() &&
            state->transfers[0].outlet == state;
}

inline bool Parser::counterAssigned(State *state) const
{
    return state->transfers[0].counter.assigned();
}

inline int Parser::getUserCount(State *state) const
{
    Transfer &transfer = state->transfers[0];
    if(!transfer.counter.assigned())
    {
        return -1;
    }
    State::CounterData &counterData = transfer.counter.retrieve();
    return counterData.userCount;
}

inline void Parser::setUserCount(State *state, int value)
{
    Transfer &transfer = state->transfers[0];
    if(!transfer.counter.assigned())
    {
        return;
    }
    State::CounterData &counterData = transfer.counter.retrieve();
    counterData.userCount = value;
}

inline MarkFlag Parser::getBasicFlag(State *state) const
{
    return state->mark.flag & MarkFlagBasicMask;
}

inline void Parser::setBasicFlag(State *state, MarkFlag value)
{
    state->mark.flag |= MarkFlagBasicMask;
    state->mark.flag &= value;
}

inline bool Parser::getEventFlag(State *state) const
{
    return (state->mark.flag & MarkFlagEventMask) != 0;
}

inline void Parser::setEventFlag(State *state, bool value)
{
    if(value)
    {
        state->mark.flag |= MarkFlagEventMask;
    }
    else
    {
        state->mark.flag &= ~MarkFlagEventMask;
    }
}

bool Parser::isForbidden(Transfer *transfer) const
{
    int count = forbiddenList.getCount();
    for (int i = 0; i < count; i++)
    {
        if(transfer == forbiddenList[i])
        {
            return true;
        }
    }
    return false;
}

bool Parser::isPioneer(State *state) const
{
    if(state == current)
    {
        return true;
    }
    int count = pioneers.getCount();
    for (int i = 0; i < count; i++)
    {
        if(state == pioneers[i])
        {
            return true;
        }
    }
    return false;
}

int Parser::matchSerial(Transfer &transfer)
{
    int i, count = transfer.charList.getCount();
    if(count == 0)
    {
        if(isForbidden(&transfer))
        {
            return -1;
        }
#ifdef __REparse_optimization__
        if(transfer.outlet == NULL)
        {
            return 0;
        }
        if(isPioneer(transfer.outlet))
        {
            return -1;
        }
        pioneers.append(transfer.outlet);
        int nextNum = transfer.outlet->transfers.getCount();
        for(int j = 0; j < nextNum; j++)
        {
            if(matchAdvance(transfer.outlet->transfers[j]) >= 0)
            {
                return 0;
            }
        }
        return -1;
#endif
    }
    if(len - pos < count)
    {
        return -1;
    }
    for(i = 0; i < count; i++)
    {
        if (str[pos+i] != transfer.charList[i])
        {
            return -1;
        }
    }
    return count;
}

int Parser::matchInclusive(const Transfer &transfer) const
{
    int i, count = transfer.charList.getCount();
    if(pos >= len)
    {
        return -1;
    }
    for(i = 0; i < count; i++)
    {
        if (str[pos] == transfer.charList[i])
        {
            return 1;
        }
    }
    return -1;
}

int Parser::matchExclusive(const Transfer &transfer) const
{
    int i, count = transfer.charList.getCount();
    if(pos >= len)
    {
        return -1;
    }
    for(i = 0; i < count; i++)
    {
        if (str[pos] == transfer.charList[i])
        {
            return -1;
        }
    }
    return 1;
}

int Parser::matchShortcut(const Transfer &transfer) const
{
    if(transfer.tag == NULL || transfer.tag->right < 0
            || len - pos < transfer.tag->right - transfer.tag->left)
    {
        return -1;
    }
    int i;
    for(i = transfer.tag->left; i < transfer.tag->right; i++)
    {
        if(str[pos + i - transfer.tag->left] != str[i])
        {
            return -1;
        }
    }
    return i - transfer.tag->left;
}

int Parser::matchAdvance(Transfer &transfer)
{
    switch(transfer.style)
    {
    case Transfer::Serial:
        return matchSerial(transfer);
    case Transfer::Inclusive:
        return matchInclusive(transfer);
    case Transfer::Exclusive:
        return matchExclusive(transfer);
    default:
        return 0;
    }
}

int Parser::match(Transfer &transfer)
{
    int res;
    switch(transfer.style)
    {
    case Transfer::Serial:
        res = matchSerial(transfer);
        pioneers.clear();
        return res;
    case Transfer::Inclusive:
        return matchInclusive(transfer);
    case Transfer::Exclusive:
        return matchExclusive(transfer);
    case Transfer::Shortcut:
        return matchShortcut(transfer);
    default:
        return -1;
    }
}

Parser::CounterStat Parser::getCounterStat(State *state) const
{
    Transfer &transfer = state->transfers[0];
    if(!transfer.counter.assigned())
    {
        return Noncount;
    }
    State::CounterData &counterData = transfer.counter.retrieve();
    State::Countset &countset = counterData.countset;
    int num = countset.getCount(), count = counterData.userCount;
    int max = 0;
    bool inside = false;
    for(int i = 0; i < num; i++)
    {
        if(max < countset[i].high || countset[i].high == LogicalInfinite)
        {
            max = countset[i].high;
        }
        if(inside || count >= countset[i].low && (count <= countset[i].high ||
                countset[i].high == LogicalInfinite))
        {
            inside = true;
            if (count < max || max == LogicalInfinite)
            {
                return Inside;
            }
        }
    }
    if(count == max)
    {
        return Max;
    }
    return Outside;
}

bool Parser::enter(Packet &packet)
{
    bool result = ~getEventFlag(current);
    switch(MarkFlag flag = getBasicFlag(current))
    {
    case MarkFlagLeft:
        if(result)
        {
            packet.style = Parser::Packet::Single;
            packet.pos = current->mark.tag->left;
#ifdef __REparse_debug__
            cout << "% to push left parenthesis: " << packet.pos << endl;
#endif
        }
#ifdef __REparse_debug__
        cout << "% left parenthesis: " << pos << endl;
#endif
        current->mark.tag->left = pos;
        return result;
    case MarkFlagRight:
        if(result)
        {
            packet.style = Parser::Packet::Single;
            packet.pos = current->mark.tag->right;
#ifdef __REparse_debug__
            cout << "% to push right parenthesis: " << packet.pos << endl;
#endif
        }
#ifdef __REparse_debug__
        cout << "% right parenthesis: " << pos << endl;
#endif
        current->mark.tag->right = pos;
        return result;
    case MarkFlagBoth:
        if(result)
        {
            packet.count = getUserCount(current);
            packet.left = current->mark.tag->left;
            packet.right = current->mark.tag->right;
            packet.style = (packet.count >= 0) ? Parser::Packet::CountLoop :
                    (flag == MarkFlagBoth)? Parser::Packet::NoncountLoop :
                    Parser::Packet::NoncountLoopVisited;
#ifdef __REparse_debug__
            cout << "% to push parentheses: " << packet.left << ", " <<
                    packet.right << ": " << packet.count << endl;
#endif
        }
#ifdef __REparse_debug__
        cout << "% new parentheses: " << pos << endl;
#endif
        current->mark.tag->left = -1;
        current->mark.tag->right = pos;
        setBasicFlag(current, MarkFlagBothVisited);
        return result;
    case MarkFlagBothVisited:
        if(result)
        {
            packet.count = getUserCount(current);
            packet.left = current->mark.tag->left;
            packet.right = current->mark.tag->right;
            packet.style = (packet.count >= 0) ? Parser::Packet::CountLoop :
                    Parser::Packet::NoncountLoopVisited;
#ifdef __REparse_debug__

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -