⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.cpp

📁 一个完整的编辑器的代码(很值得参考
💻 CPP
📖 第 1 页 / 共 3 页
字号:
///////////////////////////////////////////////////////////////////////////
//  File:    regexp.cpp
//  Version: 1.1.0.4
//  Updated: 19-Jul-1998
//
//  Copyright:  Marko Macek
//  E-mail:     Marko.Macek@gmx.net or mark@hermes.si
//
//  Some handy stuff to deal with regular expressions
//
//  You are free to use or modify this code to the following restrictions:
//  - Acknowledge me somewhere in your about box, simple "Parts of code by.."
//  will be enough. If you can't (or don't want to), contact me personally.
//  - LEAVE THIS HEADER INTACT
////////////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "regexp.h"

//#define DEBUG

static int RegCount = 0;

#ifdef DEBUG
static void RxDump(int N, RxNode *n);
#endif

static  RxNode *NewNode(int aWhat) {
    RxNode *N = (RxNode *) malloc(sizeof(RxNode));

    if (N) {
        memset(N, 0, sizeof(RxNode));
        N->fWhat = (short)aWhat;
    }
    return N;
}

static RxNode *NewChar(TCHAR Ch) {
    RxNode *A = NewNode(RE_CHAR);

    if (A) {
        A->fChar = (LPTSTR) malloc(1);
        A->fLen = 1;
        A->fChar[0] = Ch;
    }
    return A;
}

static RxNode *NewEscape(LPCTSTR *const Regexp) {
    TCHAR Ch = **Regexp;
    ++*Regexp;
    switch (Ch) {
      case 0: return 0;
      case _T('a'): Ch = _T('\a'); break;
      case _T('b'): Ch = _T('\b'); break;
      case _T('f'): Ch = _T('\f'); break;
      case _T('n'): Ch = _T('\n'); break;
      case _T('r'): Ch = _T('\r'); break;
      case _T('t'): Ch = _T('\t'); break;
      case _T('v'): Ch = _T('\v'); break;
      case _T('e'): Ch = 27; break;
      case _T('s'): return NewNode(RE_WSPACE);
      case _T('S'): return NewNode(RE_NWSPACE);
      case _T('U'): return NewNode(RE_UPPER);
      case _T('L'): return NewNode(RE_LOWER);
      case _T('w'): return NewNode(RE_WORD);
      case _T('W'): return NewNode(RE_NWORD);
      case _T('d'): return NewNode(RE_DIGIT);
      case _T('D'): return NewNode(RE_NDIGIT);
      case _T('C'): return NewNode(RE_CASE);
      case _T('c'): return NewNode(RE_NCASE);
      case _T('N'):
        {
            unsigned int N = 0;
            unsigned int A = 0;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) return 0;
            (*Regexp)++;
            A = N * 100;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) return 0;
            (*Regexp)++;
            A = A + N * 10;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) return 0;
            (*Regexp)++;
            A = A + N;
            Ch = (TCHAR) A;
        }
        break;
    case _T('o'):
        {
            unsigned int N = 0;
            unsigned int A = 0;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 7) return 0;
            (*Regexp)++;
            A = N * 64;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 7) return 0;
            (*Regexp)++;
            A = A + N * 8;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 7) return 0;
            (*Regexp)++;
            A = A + N;
            Ch = (TCHAR) A;
        }
        break;
    case _T('x'):
        {
            unsigned int N = 0;
            unsigned int A = 0;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
            (*Regexp)++;
            A = N << 4;
            if (**Regexp == 0) return 0;
            N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
            (*Regexp)++;
            A = A + N;
            Ch = (TCHAR) A;
        }
        break;
    }
    return NewChar(Ch);
}


#define NNN 32        // 8 * 32 = 256 (match set)

#ifdef _UNICODE
#define SETOP(set,n) \
    do { \
      set[(TCHAR)(n) >> 3] |= (TCHAR)(1 << ((TCHAR)(n) & 7)); \
    } while (0)
#else // _UNICODE
#define SETOP(set,n) \
    do { \
      set[(unsigned char)(n) >> 3] |= (unsigned char)(1 << ((unsigned char)(n) & 7)); \
    } while (0)
#endif // _UNICODE

static RxNode *NewSet(LPCTSTR * const Regexp) {
#ifdef _UNICODE
    TCHAR set[NNN];
#else // _UNICODE
    unsigned char set[NNN];
#endif // _UNICODE
    int s = 0;
    int c = 0;
    unsigned int i, xx;
#ifdef _UNICODE
    TCHAR Ch, C1 = 0, C2 = 0;
#else // _UNICODE
    unsigned char Ch, C1 = 0, C2 = 0;
#endif // _UNICODE
    int doset = 0;

    memset(set, 0, sizeof(set));
    s = 1;
    if (**Regexp == _T('^')) {
        s = 0;
        ++*Regexp;
    }
    c = 0;

    while (**Regexp) {
        switch (Ch = *((*Regexp)++)) {
          case _T(']'):
            if (doset == 1) return 0;
            {
                RxNode *N = NewNode(s?RE_INSET:RE_NOTINSET);
                N->fChar = (LPTSTR) malloc(sizeof(set));
                N->fLen = sizeof(set);
                if (N->fChar == 0) return 0;
                memcpy(N->fChar, (LPTSTR) set, sizeof(set));
                return N;
            }
          case _T('\\'):
            switch (Ch = *((*Regexp)++)) {
              case 0: return 0;
              case _T('a'): Ch = _T('\a'); break;
              case _T('b'): Ch = _T('\b'); break;
              case _T('f'): Ch = _T('\f'); break;
              case _T('n'): Ch = _T('\n'); break;
              case _T('r'): Ch = _T('\r'); break;
              case _T('t'): Ch = _T('\t'); break;
              case _T('v'): Ch = _T('\v'); break;
              case _T('e'): Ch = 27; break;
              case _T('N'):
                  {
                      unsigned int N = 0;
                      unsigned int A = 0;
                      if (**Regexp == 0) return 0;
                      N = _totupper(**Regexp) - 48; if (N > 9) return 0;
                      (*Regexp)++;
                      A = N * 100;
                      if (**Regexp == 0) return 0;
                      N = _totupper(**Regexp) - 48; if (N > 9) return 0;
                      (*Regexp)++;
                      A = A + N * 10;
                      if (**Regexp == 0) return 0;
                      N = _totupper(**Regexp) - 48; if (N > 9) return 0;
                      (*Regexp)++;
                      A = A + N;
#ifdef _UNICODE
                      Ch = (TCHAR)A;
#else // _UNICODE
                      Ch = (unsigned char)A;
#endif // _UNICODE
                  }
                  break;
            case _T('o'):
                {
                    unsigned int N = 0;
                    unsigned int A = 0;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 7) return 0;
                    (*Regexp)++;
                    A = N * 64;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 7) return 0;
                    (*Regexp)++;
                    A = A + N * 8;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 7) return 0;
                    (*Regexp)++;
                    A = A + N;
#ifdef _UNICODE
                    Ch = (TCHAR)A;
#else // _UNICODE
                    Ch = (unsigned char)A;
#endif // _UNICODE
                }
                break;
            case _T('x'):
                {
                    unsigned int N = 0;
                    unsigned int A = 0;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
                    (*Regexp)++;
                    A = N << 4;
                    if (**Regexp == 0) return 0;
                    N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
                    (*Regexp)++;
                    A = A + N;
#ifdef _UNICODE
                    Ch = (TCHAR)A;
#else // _UNICODE
                    Ch = (unsigned char)A;
#endif // _UNICODE
                }
                break;
            case _T('s'):
                c += 4;
                SETOP(set, _T('\n'));
                SETOP(set, _T('\t'));
                SETOP(set, _T(' '));
                SETOP(set, _T('\r'));
                continue;
            case _T('S'):
                for (xx = 0; xx <= 255; xx++) {
                    if (xx != _T(' ') && xx != _T('\t') && xx != _T('\n') && xx != _T('\r')) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                continue;
            case _T('w'):
                for (xx = 0; xx <= 255; xx++) {
                    if (_istalnum(xx)) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                break;
            case _T('W'):
                for (xx = 0; xx <= 255; xx++) {
                    if (!isalnum(xx)) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                break;
            case _T('d'):
                for (xx = 0; xx <= 255; xx++) {
                    if (_istdigit(xx)) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                break;
            case _T('D'):
                for (xx = 0; xx <= 255; xx++) {
                    if (!_istdigit(xx)) {
                        c++;
                        SETOP(set, xx);
                    }
                }
                break;
            case _T('U'):
                for (xx = _T('A'); xx <= _T('Z'); xx++) {
                    c++;
                    SETOP(set, xx);
                }
                continue;
            case _T('L'):
                for (xx = _T('a'); xx <= _T('z'); xx++) {
                    c++;
                    SETOP(set, xx);
                }
                continue;
            }
            break;
        }
        if (doset == 0 && ((**Regexp) == _T('-'))) {
            doset = 1;
            C1 = Ch;
            ++*Regexp;
            continue;
        } else if (doset == 1) {
            C2 = Ch;
            if (C2 < C1) return 0;
            for(i = C1; i <= C2; i++) SETOP(set, i);
            doset = 0;
            continue;
        }
        c++;
        SETOP(set, Ch);
    }
    return 0;
}

static int AddNode(RxNode **F, RxNode **N, RxNode *A) {
    if (A) {
        if (*F) {
            (*N)->fNext = A;
            A->fPrev = (*N);
            *N = A;
        } else {
            (*N) = (*F) = A;
            A->fPrev = A->fNext = 0;
        }
        return 1;
    }
    return 0;
}

static int CountWidth(RxNode *N) {
    int w = 0;

    while (N) {
        if (N->fWhat < 32) w += 0;
        else if (N->fWhat >= 32 && N->fWhat < 64)
            w += 1;
        N = N->fNext;
    }
    return w;
}

static int MakeSub(RxNode **F, RxNode **N, TCHAR What) {
    //_tprintf(_T("MakeSub: %c\n", What));
    if (*N) {
        RxNode *No;
        RxNode *New;
        RxNode *Jump, *Skip;
        RxNode *Last = (*N);

        if (Last->fWhat & RE_GROUP) {
            RxNode *P = Last->fPrev;
            int C = 1;

            while ((C > 0) && P) {
                //puts(_T("backtracking...-----"));
                //RxDump(0, P);
                if (P->fWhat & RE_GROUP) {
                    if (P->fWhat & RE_CLOSE) C++;
                    else C--;
                }
                Last = P;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -