⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.cpp

📁 一个完整的编辑器的代码(很值得参考
💻 CPP
📖 第 1 页 / 共 3 页
字号:
                if (C == 0) break;
                P = P->fPrev;
            }
            //_tprintf(_T("P = %s, c = %d", P ? "ok":"null", C));
            if (C != 0) return 0;
        }
        assert(Last);
        if (What != _T('?') && What != _T('|'))
            if (CountWidth(Last) == 0) {
                //                puts(_T("FAILED count"));
                return 0;
            }
        switch (What) {
        case _T('?'):    /* BRANCH x NOTHING */
            New = NewNode(RE_BRANCH | RE_GREEDY | What);
            No = NewNode(RE_NOTHING);
            if (!New || !No) return 0;
            No->fPrev = *N;
            if (*N)
                (*N)->fNext = No;
            New->fNext = Last;
            New->fPrev = Last->fPrev;
            Last->fPrev = New;
            if (New->fPrev) {
                New->fPrev->fNext = New;
            } else {
                *F = New;
            }
            New->fPtr = No;
            No->fPtr = New;
            *N = No;
            //puts(_T("BRANCH ?"));
            break;

        case _T('*'):
        case _T('@'):
            New = NewNode(RE_BRANCH | What | ((What == _T('*')) ? RE_GREEDY : 0));
            Jump = NewNode(RE_JUMP);
            No = NewNode(RE_NOTHING);

            if (!New || !No || !Jump) return 0;
            No->fPrev = Jump;
            Jump->fNext = No;
            Jump->fPrev = *N;
            if (*N)
                (*N)->fNext = Jump;
            New->fNext = Last;
            New->fPrev = Last->fPrev;
            Last->fPrev = New;
            if (New->fPrev) {
                New->fPrev->fNext = New;
            } else {
                *F = New;
            }
            New->fPtr = No;
            No->fPtr = New;
            Jump->fPtr = New;
            *N = No;
            //puts(_T("BRANCH *"));
            break;

        case _T('#'):
        case _T('+'):
            New = NewNode(RE_BRANCH | What | ((What == _T('+')) ? RE_GREEDY : 0));
            Skip = NewNode(RE_JUMP);
            Jump = NewNode(RE_JUMP);
            No = NewNode(RE_NOTHING);

            if (!New || !No || !Jump) return 0;
            No->fPrev = Jump;
            Jump->fPrev = *N;
            Jump->fNext = No;

            Skip->fNext = New;
            New->fPrev = Skip;
            if (*N)
                (*N)->fNext = Jump;
            New->fNext = Last;
            Skip->fPrev = Last->fPrev;
            Last->fPrev = New;
            if (Skip->fPrev) {
                Skip->fPrev->fNext = Skip;
            } else {
                *F = Skip;
            }
            New->fPtr = No;
            No->fPtr = New;
            Jump->fPtr = New;
            Skip->fPtr = Last;
            *N = No;
            //puts(_T("BRANCH +"));
            break;
        case _T('|'):
            New = NewNode(RE_BRANCH | RE_GREEDY | What);
            Jump = NewNode(RE_BREAK);
            No = NewNode(RE_NOTHING);

            if (!New || !No || !Jump) return 0;
            No->fPrev = Jump;
            Jump->fNext = No;
            Jump->fPrev = *N;
            if (*N)
                (*N)->fNext = Jump;
            New->fNext = Last;
            New->fPrev = Last->fPrev;
            Last->fPrev = New;
            if (New->fPrev) {
                New->fPrev->fNext = New;
            } else {
                *F = New;
            }
            New->fPtr = No;
            No->fPtr = New;
            Jump->fPtr = New;
            *N = No;
            //puts(_T("BRANCH |"));
            break;
        }
        return 1;
    }
    return 0;
}

#define CHECK(n) do { if ((n) == 0) { return 0;} } while (0)

static RxNode *RxComp(LPCTSTR *Regexp) {
    RxNode *F = 0;
    RxNode *N = 0;
    int C;
    TCHAR Ch;

    while (**Regexp) {
        //        puts(*Regexp);
        switch (Ch = (*(*Regexp)++)) {
        case _T('?'):
        case _T('*'):
        case _T('+'):
        case _T('@'):
        case _T('#'):
        case _T('|'):
            CHECK(MakeSub(&F, &N, Ch));
            break;
        case _T('}'):
        case _T(')'):
            return F;
        case _T('{'):
            CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_OPEN)));
            CHECK(AddNode(&F, &N, RxComp(Regexp)));
            while (N->fNext) N = N->fNext;
            CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_CLOSE)));
            break;
        case _T('('):
            C = ++RegCount;
            CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_OPEN | RE_MEM | C)));
            CHECK(AddNode(&F, &N, RxComp(Regexp)));
            while (N->fNext) N = N->fNext;
            CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_CLOSE | RE_MEM | C)));
            break;
        case _T('\\'):CHECK(AddNode(&F, &N, NewEscape(Regexp)));     break;
        case _T('['): CHECK(AddNode(&F, &N, NewSet(Regexp)));        break;
        case _T('^'): CHECK(AddNode(&F, &N, NewNode(RE_ATBOL)));     break;
        case _T('$'): CHECK(AddNode(&F, &N, NewNode(RE_ATEOL)));     break;
        case _T('.'): CHECK(AddNode(&F, &N, NewNode(RE_ANY)));       break;
        case _T('<'): CHECK(AddNode(&F, &N, NewNode(RE_ATBOW)));     break;
        case _T('>'): CHECK(AddNode(&F, &N, NewNode(RE_ATEOW)));     break;
        default:
            --*Regexp;
            CHECK(AddNode(&F, &N, NewChar(**Regexp)));
            ++*Regexp;
            break;
        }
    }
    return F;
}

RxNode *RxOptimize(RxNode *rx) {
    return rx;
}

RxNode *RxCompile(LPCTSTR Regexp) {
    RxNode *n = 0, *x;
    if (Regexp == 0) return 0;
    RegCount = 0;
    n = RxComp(&Regexp);
    if (n == 0) return 0;
    n = RxOptimize(n);
    x = n;
    while (x->fNext) x = x->fNext;
    x->fNext = NewNode(RE_END);
    return n;
}

void RxFree(RxNode *n) {
    RxNode *p;

    while (n) {
        p = n;
        n = n->fNext;
        switch (p->fWhat) {
        case RE_INSET:
        case RE_NOTINSET:
        case RE_CHAR:
            free(p->fChar);
            break;
        default:
            break;
        }
        free(p);
    }
}

#define ChClass(x) (((((x) >= _T('A')) && ((x) <= _T('Z'))) || (((x) >= _T('a')) && ((x) <= _T('z'))) || (((x) >= _T('0')) && ((x) <= _T('9'))))?1:0)

static RxMatchRes *match;
static LPCTSTR bop;
static LPCTSTR eop;
static int flags = RX_CASE;
static LPCTSTR rex;

int RxMatch(RxNode *rx) {
    RxNode *n = rx;

    //_tprintf(_T(">>"));
    while (n) {
        //_tprintf(_T("%-50.50s\n", rex));
        //RxDump(1, n);
        switch (n->fWhat) {
        case RE_NOTHING:
            break;
        case RE_CASE:
            flags |= RX_CASE;
            break;
        case RE_NCASE:
            flags &= ~RX_CASE;
            break;
        case RE_ATBOL:
            if (rex != bop) return 0;
            break;
        case RE_ATEOL:
            if (rex != eop) return 0;
            break;
        case RE_ANY:
            if (rex == eop) return 0;
            rex++;
            break;
        case RE_WSPACE:
            if (rex == eop) return 0;
            if (*rex != _T(' ') && *rex != _T('\n') && *rex != _T('\r') && *rex != _T('\t')) return 0;
            rex++;
            break;
        case RE_NWSPACE:
            if (rex == eop) return 0;
            if (*rex == _T(' ') || *rex == _T('\n') || *rex == _T('\r') || *rex == _T('\t')) return 0;
            rex++;
            break;
        case RE_WORD:
            if (rex == eop) return 0;
            if (!_istalnum(*rex)) return 0;
            rex++;
            break;
        case RE_NWORD:
            if (rex == eop) return 0;
            if (_istalnum(*rex)) return 0;
            rex++;
            break;
        case RE_DIGIT:
            if (rex == eop) return 0;
            if (!_istdigit(*rex)) return 0;
            rex++;
            break;
        case RE_NDIGIT:
            if (rex == eop) return 0;
            if (_istdigit(*rex)) return 0;
            rex++;
            break;
        case RE_UPPER:
            if (rex == eop) return 0;
            if (!_istupper(*rex)) return 0;
            rex++;
            break;
        case RE_LOWER:
            if (rex == eop) return 0;
            if (!_istlower(*rex)) return 0;
            rex++;
            break;
        case RE_ATBOW:
            if (rex >= eop) return 0;
            if (rex > bop) {
                if ((ChClass(*rex) != 1) || (ChClass(*(rex-1)) != 0)) return 0;
            }
            break;
        case RE_ATEOW:
            if (rex <= bop) return 0;
            if (rex < eop) {
                if ((ChClass(*rex) != 0) || (ChClass(*(rex-1)) != 1)) return 0;
            }
            break;
        case RE_CHAR:
            if (rex == eop) return 0;
            if (flags & RX_CASE) {
                if (*n->fChar != *rex) return 0;
                if (memcmp(rex, n->fChar, n->fLen) != 0) return 0;
            } else {
                for (int i = 0; i < n->fLen; i++)
                    if (_totupper(rex[i]) != _totupper(n->fChar[i]))
                        return 0;
            }
            rex += n->fLen;
            break;
        case RE_INSET:
            if (rex == eop) return 0;
#ifdef _UNICODE
            if ((n->fChar[(TCHAR)(*rex) >> 3] & (1 << ((TCHAR)(*rex) & 7))) == 0) return 0;
#else // _UNICODE
            if ((n->fChar[(unsigned char)(*rex) >> 3] & (1 << ((unsigned char)(*rex) & 7))) == 0) return 0;
#endif // _UNICODE
            rex++;
            break;
        case RE_NOTINSET:
            if (rex == eop) return 0;
#ifdef _UNICODE
            if (n->fChar[(TCHAR)(*rex) >> 3] & (1 << ((TCHAR)(*rex) & 7))) return 0;
#else // _UNICODE
            if (n->fChar[(unsigned char)(*rex) >> 3] & (1 << ((unsigned char)(*rex) & 7))) return 0;
#endif // _UNICODE
            rex++;
            break;
        case RE_JUMP:
            n = n->fPtr;
            continue;
        case RE_END:
            return 1;
        case RE_BREAK:
            n = n->fNext;
            if (n->fNext == 0) break;
            n = n->fNext;
            if (n->fWhat & RE_BRANCH) {
                while ((n->fWhat & RE_BRANCH) && n->fPtr && ((n->fWhat & 0xFF) == _T('|')))
                    n = n->fPtr->fNext;
            }
            if (n->fWhat & RE_GROUP) {
                int C = 1;
                n = n->fNext;
                while ((C > 0) && n) {
                    if (n->fWhat & RE_GROUP) {
                        if (n->fWhat & RE_OPEN) C++;
                        else C--;
                    }
                    if (C == 0) break;
                    n = n->fNext;
                }
            }
            break;
        default:
            if (n->fWhat & RE_GROUP) {
                if (n->fWhat & RE_MEM) {
                    LPCTSTR save = rex;
                    int b = n->fWhat & 0xFF;
                    int fl = flags;

                    if (RxMatch(n->fNext) == 0) {
                        flags = fl;
                        if (n->fWhat & RE_OPEN)
                            match->Open[b] = -1;
                        else
                            match->Close[b] = -1;
                        return 0;
                    }

                    if (n->fWhat & RE_OPEN) {
                        //                        if (match->Open[b] == -1)
                        match->Open[b] = (int) (save - bop);
                    } else {
                        //                        if (match->Close[b] == -1)
                        match->Close[b] = (int) (save - bop);
                    }
                    return 1;
                }
            } else if (n->fWhat & RE_BRANCH) {
                LPCTSTR save = rex;
                int fl = flags;

                if ((n->fWhat & RE_GREEDY) == 0) {
                    if (RxMatch(n->fPtr) == 1) return 1;
                    flags = fl;
                    rex = save;
                } else {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -