⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.cpp

📁 一个完整的编辑器的代码(很值得参考
💻 CPP
📖 第 1 页 / 共 3 页
字号:
                    if (RxMatch(n->fNext) == 1) return 1;
                    flags = fl;
                    rex = save;
                    n = n->fPtr;
                    continue;
                }
            }
            break;
        }
        n = n->fNext;
    }
    /* NOTREACHED */
    assert(1 == 0 /* internal regexp error */);
    return 0;
}

int RxTry(RxNode *rx, LPCTSTR s) {
    int fl = flags;
    rex = s;
    for (int i = 0; i < NSEXPS; i++)
        match->Open[i] = match->Close[i] = -1;
    if (RxMatch(rx)) {
        match->Open[0] = (int) (s - bop);
        match->Close[0] = (int) (rex - bop);
        return 1;
    }
    flags = fl;
    return 0;
}

int RxExec(RxNode *Regexp, LPCTSTR Data, int Len, LPCTSTR Start, RxMatchRes *Match, unsigned int RxOpt) {
    TCHAR Ch;
    if (Regexp == 0) return 0;

    match = Match;
    bop = Data;
    eop = Data + Len;

    flags = RxOpt;

    for (int i = 0; i < NSEXPS; i++) Match->Open[i] = Match->Close[i] = -1;

    switch (Regexp->fWhat) { // this should be more clever
    case RE_ATBOL:     // match is anchored
        return RxTry(Regexp, Start);
    case RE_CHAR:    // search for a character to match
        Ch = Regexp->fChar[0];
        if (Start == eop)
            break;
        if (flags & RX_CASE) {
            while (1) {
                while (Start < eop && *Start != Ch)
                    Start++;
                if (Start == eop)
                    break;
                if (RxTry(Regexp, Start))
                    return 1;
                if (++Start == eop)
                    break;
            }
        } else {
            Ch = _totupper(Ch);
            while (1) {
                while (Start < eop && (TCHAR) _totupper(*Start) != Ch)
                    Start++;
                if (Start == eop)
                    break;
                if (RxTry(Regexp, Start))
                    return 1;
                if (++Start == eop)
                    break;
            }
        }
        break;
    default:         // (slow)
        do {
            if (RxTry(Regexp, Start)) return 1;
        } while (Start++ < eop);
        break;
    }
    return 0;
}

#define FLAG_UP_CASE     1
#define FLAG_DOWN_CASE   2
#define FLAG_UP_NEXT     4
#define FLAG_DOWN_NEXT   8

static int add(int *len, LPTSTR *s, LPCTSTR a, int alen, int &flag) {
    int NewLen = *len + alen;
    int i;

    NewLen = NewLen * 2;

    if (alen == 0)
        return 0;

    if (*s) {
        *s = (LPTSTR) realloc(*s, NewLen);
        assert(*s);
        memcpy(*s + *len, a, alen);
    } else {
        *s = (LPTSTR) malloc(NewLen);
        assert(*s);
        memcpy(*s, a, alen);
        *len = 0;
    }
    if (flag & FLAG_UP_CASE) {
        LPTSTR p = *s + *len;

        for (i = 0; i < alen; i++) {
            *p = (TCHAR)_totupper(*p);
            p++;
        }
    } else if (flag & FLAG_DOWN_CASE) {
        LPTSTR p = *s + *len;

        for (i = 0; i < alen; i++) {
            *p = (TCHAR)_totlower(*p);
            p++;
        }
    }
    if (flag & FLAG_UP_NEXT) {
        LPTSTR p = *s + *len;

        *p = (TCHAR)_totupper(*p);
        flag &= ~FLAG_UP_NEXT;
    } else if (flag & FLAG_DOWN_NEXT) {
        LPTSTR p = *s + *len;

        *p = (TCHAR)_totlower(*p);
        flag &= ~FLAG_DOWN_NEXT;
    }
    *len += alen;
    return 0;
}

int RxReplace(LPCTSTR rep, LPCTSTR Src, int /*len*/, RxMatchRes match, LPTSTR *Dest, int *Dlen) {
    int dlen = 0;
    LPTSTR dest = 0;
    TCHAR Ch;
    int n;
    int flag = 0;

    *Dest = 0;
    *Dlen = 0;
    //    add(&dlen, &dest, Src, match.Open[0]);
    while (*rep) {
        switch (Ch = *rep++) {
            //        case _T('&'):
            //            add(&dlen, &dest, Src + match.Open[0], match.Close[0] - match.Open[0], flag);
            //            break;
        case _T('\\'):
            switch (Ch = *rep++) {
            case _T('0'):
            case _T('1'): case _T('2'): case _T('3'):
            case _T('4'): case _T('5'): case _T('6'):
            case _T('7'): case _T('8'): case _T('9'):
                n = Ch - 48;

                if (match.Open[n] != -1 && match.Close[n] != -1) {
                    add(&dlen, &dest, Src + match.Open[n], match.Close[n] - match.Open[n], flag);
                } else return -1;
                break;
            case 0:
                if (dest) free(dest);
                return -1; // error
            case _T('r'): Ch = _T('\r'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('n'): Ch = _T('\n'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('b'): Ch = _T('\b'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('a'): Ch = _T('\a'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('t'): Ch = _T('\t'); add(&dlen, &dest, &Ch, 1, flag); break;
            case _T('U'): flag |= FLAG_UP_CASE; break;
            case _T('u'): flag |= FLAG_UP_NEXT; break;
            case _T('L'): flag |= FLAG_DOWN_CASE; break;
            case _T('l'): flag |= FLAG_DOWN_NEXT; break;
            case _T('E'):
            case _T('e'): flag &= ~(FLAG_UP_CASE | FLAG_DOWN_CASE); break;
            case _T('x'):
                {
                    int N = 0;
                    int A = 0;

                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
                    rep++;
                    A = N << 4;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
                    rep++;
                    A = A + N;
                    Ch = (TCHAR)A;
                }
                add(&dlen, &dest, &Ch, 1, flag);
                break;
            case _T('d'):
                {
                    int N = 0;
                    int A = 0;

                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) return 0;
                    rep++;
                    A = N * 100;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) return 0;
                    rep++;
                    A = N * 10;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 9) return 0;
                    rep++;
                    A = A + N;
                    Ch = (TCHAR)A;
                }
                add(&dlen, &dest, &Ch, 1, flag);
                break;
            case _T('o'):
                {
                    int N = 0;
                    int A = 0;

                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 7) return 0;
                    rep++;
                    A = N * 64;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 7) return 0;
                    rep++;
                    A = N * 8;
                    if (*rep == 0) return 0;
                    N = _totupper(*rep) - 48; if (N > 7) return 0;
                    rep++;
                    A = A + N;
                    Ch = (TCHAR)A;
                }
                add(&dlen, &dest, &Ch, 1, flag);
                break;
            default:
                add(&dlen, &dest, &Ch, 1, flag);
                break;
            }
            break;
        default:
            add(&dlen, &dest, &Ch, 1, flag);
            break;
        }
    }
    //    add(&dlen, &dest, Src + match.Close[0], len - match.Close[0]);
    *Dlen = dlen;
    *Dest = dest;
    return 0;
}

#if 0

static void RxDump(int N, RxNode *n) {
    while (n) {
        for (int i = 0; i < N; i++) _tprintf(_T("    "));
        switch (n->fWhat) {
        case RE_NOTHING:   _tprintf(_T("NOTHING\n")); break;
        case RE_CHAR:      _tprintf(_T("CHAR '%.1s'\n"), n->fChar); break;
        case RE_ATBOL:     _tprintf(_T("^\n")); break;
        case RE_ATEOL:     _tprintf(_T("$\n")); break;
        case RE_ANY:       _tprintf(_T(".\n")); break;
        case RE_INSET:     _tprintf(_T("[\n")/*, n->fChar*/); break;
        case RE_NOTINSET:  _tprintf(_T("[^\n")/*, n->fChar*/); break;
        case RE_ATBOW:     _tprintf(_T("<\n")); break;
        case RE_ATEOW:     _tprintf(_T(">\n")); break;
        case RE_WSPACE:    _tprintf(_T("WSPACE\n")); break;
        case RE_NWSPACE:   _tprintf(_T("NWSPACE\n")); break;
        case RE_UPPER:     _tprintf(_T("UPPER\n")); break;
        case RE_LOWER:     _tprintf(_T("LOWER\n")); break;
        case RE_JUMP:      _tprintf(_T("JUMP\n")); break;
        case RE_BREAK:     _tprintf(_T("BREAK\n")); break;
        case RE_END:       _tprintf(_T("END\n")); break;
        default:
            if (n->fWhat & RE_GROUP) {
                if (n->fWhat & RE_MEM) {
                    if (n->fWhat & RE_OPEN)  _tprintf(_T("(  %d\n"), n->fWhat & 0xFF);
                    if (n->fWhat & RE_CLOSE) _tprintf(_T(")  %d\n"), n->fWhat & 0xFF);
                } else {
                    if (n->fWhat & RE_OPEN)  _tprintf(_T("{\n"));
                    if (n->fWhat & RE_CLOSE) _tprintf(_T("}\n"));
                }
            } else if (n->fWhat & RE_BRANCH) {
                if (n->fWhat & RE_GREEDY) {
                    _tprintf(_T("%c\n"), n->fWhat & 0xFF);
                } else {
                    _tprintf(_T("%c\n"), n->fWhat & 0xFF);
                }
            } else {
                _tprintf(_T("???????????????\n"));
            }
            break;
        }
        n = n->fNext;
    }
}

#define TEST(rc,rx,st) \
    _tcscpy(line,st); \
    assert((a = RxCompile(rx)) != 0); \
    puts(_T("\n--- " rx " -- " st " -- ")); \
    RxDump(0,a);\
    assert(rc == RxExec(a, line, _tcslen(line), line, &b)); \
    RxFree(a);

int main() {
    RxNode *a;
    RxMatchRes b;
    TCHAR line[1024];

    TEST(1, _T("a"), _T("a"));
    TEST(0, _T("b"), _T("a"));
    TEST(1, _T("aaaa"), _T("aaaa"));
    TEST(0, _T("bbbb"), _T("aaaa"));
    TEST(1, _T("."), _T("a"));
    TEST(0, _T("."), _T(""));
    TEST(1, _T("a.."), _T("axx"));
    TEST(0, _T("a.."), _T("b.."));
    TEST(1, _T("a?b"), _T("ab"));
    TEST(1, _T("a?b"), _T("xb"));
    TEST(0, _T("a?C"), _T("xb"));
    TEST(1, _T("{aa}?b"), _T("aab"));
    TEST(1, _T("{aa}?b"), _T("xab"));
    TEST(0, _T("{aa}?C"), _T("xxb"));
    TEST(1, _T("^aa"), _T("aa"));
    TEST(0, _T("^aa"), _T("baa"));
    TEST(1, _T("^aa$"),_T("aa"));
    TEST(0, _T("^aa$"), _T("baab"));
    TEST(1, _T("a*b"), _T("aaab"));
    TEST(0, _T("a*b"), _T("aaaa"));
    TEST(1, _T("{aa}*b"), _T("aaab"));
    TEST(0, _T("{aa}*b"), _T("aaaa"));
    TEST(1, _T("b+"), _T("bb"));
    TEST(1, _T("b+"), _T("b"));
    TEST(0, _T("b+"), _T("a"));
    TEST(1, _T("^b+$"), _T("b"));
    TEST(0, _T("^b+$"), _T("aba"));
    TEST(1, _T("a|b"), _T(" a"));
    TEST(1, _T("a|b"), _T(" b"));
    TEST(0, _T("a|b"), _T(" c"));
    TEST(1, _T("a|b|c|d|e"), _T(" a "));
    TEST(1, _T("a|b|c|d|e"), _T(" c "));
    TEST(1, _T("a|b|c|d|e"), _T(" e "));
    TEST(0, _T("a|b|c|d|e"), _T(" x "));
    TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" a "));
    TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" c "));
    TEST(1, _T("{a}|{b}|{c}|{d}|{e}"), _T(" e "));
    TEST(0, _T("{a}|{b}|{c}|{d}|{e}"), _T(" x "));
    TEST(1, _T("^xx{alpha}|{beta}xx$"), _T("xxalphaxx"));
    TEST(1, _T("^xx{alpha}|{beta}xx$"), _T("xxbetaxx"));
    TEST(1, _T("[a-z]"), _T("aaa"));
    TEST(1, _T("^{Error}|{Warning}"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^{Error}|{Warning} (.+)"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^{Error}|{Warning} ([a-z.]#) ([0-9]#)"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^{Error}|{Warning} (.+) ([0-9]+): (.*)$"), _T("Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^{Error}|{Warning} (.+) ([0-9]+): (.*)$"), _T("Error search.cpp 35: Conversion may lose significant digits in function AskReplace()"));
    TEST(1, _T("^([a-z]+ +)*\\("), _T("blabla bla bla bla ("));
    TEST(1, _T("^([a-z]+\\s+)+\\("), _T("blabla bla bla bla ("));
    TEST(1, _T("^([a-z]+\\s*)+\\("), _T("blabla bla bla bla("));
    TEST(1, _T("^([a-z]+\\s+)+\\("), _T("blabla bla   bla bla ("));
    TEST(1, _T("^([a-z]+\\s*)+\\("), _T("blabla   bla bla bla("));
    TEST(1, _T("^([a-z]# #)*\\("), _T("blabla bla bla bla ("));
    TEST(1, _T("^([a-z]+ @)@\\("), _T("blabla bla bla bla ("));
    TEST(1, _T("^[\\x20-\\xFF]+$"), _T("blabla"));
    TEST(1, _T("{a{a{a{a|a}|{a|a}a}a}a|a}"), _T("aaaaaaaaaaaaaaaaa"));

    while (1) {
        _tprintf(_T ("Regexp: ")); fflush(stdout); gets(line);
        if (!*line) break;
        a = RxCompile(line); RxDump(0, a);
        _tprintf(_T ("String: ")); fflush(stdout); gets(line);
        _tprintf(_T ("rc = %d\n"), RxExec(a, line, _tcslen(line), line, &b));
        for (int i = 0; i < NSEXPS; i++) {
            if (b.Open[i] != -1) {
                _tprintf(_T ("%d: %d %d\n"), i, b.Open[i], b.Close[i]);
            }
        }
        RxFree(a);
    }
    return 0;
}

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -