📄 regexp.cpp
字号:
///////////////////////////////////////////////////////////////////////////
// File: regexp.cpp
// Version: 1.1.0.4
// Updated: 19-Jul-1998
//
// Copyright: Marko Macek
// E-mail: Marko.Macek@gmx.net or mark@hermes.si
//
// Some handy stuff to deal with regular expressions
//
// You are free to use or modify this code to the following restrictions:
// - Acknowledge me somewhere in your about box, simple "Parts of code by.."
// will be enough. If you can't (or don't want to), contact me personally.
// - LEAVE THIS HEADER INTACT
////////////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "regexp.h"
//#define DEBUG
static int RegCount = 0;
#ifdef DEBUG
static void RxDump(int N, RxNode *n);
#endif
static RxNode *NewNode(int aWhat) {
RxNode *N = (RxNode *) malloc(sizeof(RxNode));
if (N) {
memset(N, 0, sizeof(RxNode));
N->fWhat = (short)aWhat;
}
return N;
}
static RxNode *NewChar(TCHAR Ch) {
RxNode *A = NewNode(RE_CHAR);
if (A) {
A->fChar = (LPTSTR) malloc(1);
A->fLen = 1;
A->fChar[0] = Ch;
}
return A;
}
static RxNode *NewEscape(LPCTSTR *const Regexp) {
TCHAR Ch = **Regexp;
++*Regexp;
switch (Ch) {
case 0: return 0;
case _T('a'): Ch = _T('\a'); break;
case _T('b'): Ch = _T('\b'); break;
case _T('f'): Ch = _T('\f'); break;
case _T('n'): Ch = _T('\n'); break;
case _T('r'): Ch = _T('\r'); break;
case _T('t'): Ch = _T('\t'); break;
case _T('v'): Ch = _T('\v'); break;
case _T('e'): Ch = 27; break;
case _T('s'): return NewNode(RE_WSPACE);
case _T('S'): return NewNode(RE_NWSPACE);
case _T('U'): return NewNode(RE_UPPER);
case _T('L'): return NewNode(RE_LOWER);
case _T('w'): return NewNode(RE_WORD);
case _T('W'): return NewNode(RE_NWORD);
case _T('d'): return NewNode(RE_DIGIT);
case _T('D'): return NewNode(RE_NDIGIT);
case _T('C'): return NewNode(RE_CASE);
case _T('c'): return NewNode(RE_NCASE);
case _T('N'):
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = N * 100;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = A + N * 10;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = A + N;
Ch = (TCHAR) A;
}
break;
case _T('o'):
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = N * 64;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = A + N * 8;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = A + N;
Ch = (TCHAR) A;
}
break;
case _T('x'):
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
(*Regexp)++;
A = N << 4;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
(*Regexp)++;
A = A + N;
Ch = (TCHAR) A;
}
break;
}
return NewChar(Ch);
}
#define NNN 32 // 8 * 32 = 256 (match set)
#ifdef _UNICODE
#define SETOP(set,n) \
do { \
set[(TCHAR)(n) >> 3] |= (TCHAR)(1 << ((TCHAR)(n) & 7)); \
} while (0)
#else // _UNICODE
#define SETOP(set,n) \
do { \
set[(unsigned char)(n) >> 3] |= (unsigned char)(1 << ((unsigned char)(n) & 7)); \
} while (0)
#endif // _UNICODE
static RxNode *NewSet(LPCTSTR * const Regexp) {
#ifdef _UNICODE
TCHAR set[NNN];
#else // _UNICODE
unsigned char set[NNN];
#endif // _UNICODE
int s = 0;
int c = 0;
unsigned int i, xx;
#ifdef _UNICODE
TCHAR Ch, C1 = 0, C2 = 0;
#else // _UNICODE
unsigned char Ch, C1 = 0, C2 = 0;
#endif // _UNICODE
int doset = 0;
memset(set, 0, sizeof(set));
s = 1;
if (**Regexp == _T('^')) {
s = 0;
++*Regexp;
}
c = 0;
while (**Regexp) {
switch (Ch = *((*Regexp)++)) {
case _T(']'):
if (doset == 1) return 0;
{
RxNode *N = NewNode(s?RE_INSET:RE_NOTINSET);
N->fChar = (LPTSTR) malloc(sizeof(set));
N->fLen = sizeof(set);
if (N->fChar == 0) return 0;
memcpy(N->fChar, (LPTSTR) set, sizeof(set));
return N;
}
case _T('\\'):
switch (Ch = *((*Regexp)++)) {
case 0: return 0;
case _T('a'): Ch = _T('\a'); break;
case _T('b'): Ch = _T('\b'); break;
case _T('f'): Ch = _T('\f'); break;
case _T('n'): Ch = _T('\n'); break;
case _T('r'): Ch = _T('\r'); break;
case _T('t'): Ch = _T('\t'); break;
case _T('v'): Ch = _T('\v'); break;
case _T('e'): Ch = 27; break;
case _T('N'):
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = N * 100;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = A + N * 10;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = A + N;
#ifdef _UNICODE
Ch = (TCHAR)A;
#else // _UNICODE
Ch = (unsigned char)A;
#endif // _UNICODE
}
break;
case _T('o'):
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = N * 64;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = A + N * 8;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = A + N;
#ifdef _UNICODE
Ch = (TCHAR)A;
#else // _UNICODE
Ch = (unsigned char)A;
#endif // _UNICODE
}
break;
case _T('x'):
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
(*Regexp)++;
A = N << 4;
if (**Regexp == 0) return 0;
N = _totupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
(*Regexp)++;
A = A + N;
#ifdef _UNICODE
Ch = (TCHAR)A;
#else // _UNICODE
Ch = (unsigned char)A;
#endif // _UNICODE
}
break;
case _T('s'):
c += 4;
SETOP(set, _T('\n'));
SETOP(set, _T('\t'));
SETOP(set, _T(' '));
SETOP(set, _T('\r'));
continue;
case _T('S'):
for (xx = 0; xx <= 255; xx++) {
if (xx != _T(' ') && xx != _T('\t') && xx != _T('\n') && xx != _T('\r')) {
c++;
SETOP(set, xx);
}
}
continue;
case _T('w'):
for (xx = 0; xx <= 255; xx++) {
if (_istalnum(xx)) {
c++;
SETOP(set, xx);
}
}
break;
case _T('W'):
for (xx = 0; xx <= 255; xx++) {
if (!isalnum(xx)) {
c++;
SETOP(set, xx);
}
}
break;
case _T('d'):
for (xx = 0; xx <= 255; xx++) {
if (_istdigit(xx)) {
c++;
SETOP(set, xx);
}
}
break;
case _T('D'):
for (xx = 0; xx <= 255; xx++) {
if (!_istdigit(xx)) {
c++;
SETOP(set, xx);
}
}
break;
case _T('U'):
for (xx = _T('A'); xx <= _T('Z'); xx++) {
c++;
SETOP(set, xx);
}
continue;
case _T('L'):
for (xx = _T('a'); xx <= _T('z'); xx++) {
c++;
SETOP(set, xx);
}
continue;
}
break;
}
if (doset == 0 && ((**Regexp) == _T('-'))) {
doset = 1;
C1 = Ch;
++*Regexp;
continue;
} else if (doset == 1) {
C2 = Ch;
if (C2 < C1) return 0;
for(i = C1; i <= C2; i++) SETOP(set, i);
doset = 0;
continue;
}
c++;
SETOP(set, Ch);
}
return 0;
}
static int AddNode(RxNode **F, RxNode **N, RxNode *A) {
if (A) {
if (*F) {
(*N)->fNext = A;
A->fPrev = (*N);
*N = A;
} else {
(*N) = (*F) = A;
A->fPrev = A->fNext = 0;
}
return 1;
}
return 0;
}
static int CountWidth(RxNode *N) {
int w = 0;
while (N) {
if (N->fWhat < 32) w += 0;
else if (N->fWhat >= 32 && N->fWhat < 64)
w += 1;
N = N->fNext;
}
return w;
}
static int MakeSub(RxNode **F, RxNode **N, TCHAR What) {
//_tprintf(_T("MakeSub: %c\n", What));
if (*N) {
RxNode *No;
RxNode *New;
RxNode *Jump, *Skip;
RxNode *Last = (*N);
if (Last->fWhat & RE_GROUP) {
RxNode *P = Last->fPrev;
int C = 1;
while ((C > 0) && P) {
//puts(_T("backtracking...-----"));
//RxDump(0, P);
if (P->fWhat & RE_GROUP) {
if (P->fWhat & RE_CLOSE) C++;
else C--;
}
Last = P;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -