📄 cregexp.cpp
字号:
//// Copyright (c) Cail Lomecb (Igor Ruskih) 1999-2000 <ruiv@uic.nnov.ru>// You can use, modify, distribute this code or any other part// of colorer library in sources or in binaries only according// to Colorer License (see /doc/license.txt for more information).//#include "stdafx.h"#include "cregexp.h"
//Up: /[A-Z \x80-\x9f \xf0 ]/x//Lo: /[a-z \xa0-\xaf \xe0-\xef \xf1 ]/x//Wd: /[\d _ A-Z a-z \xa0-\xaf \xe0-\xf1 \x80-\x9f]/x/* // koi8SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x80000, 0x0, 0xffffffff}, LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x8, 0xffffffff, 0x0}, WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x80008, 0xffffffff, 0xffffffff}, DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};//*///*/ dos866SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0xffffffff, 0x0, 0x0, 0x10000}, LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0xffff, 0x0, 0x2ffff}, WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0xffffffff, 0xffff, 0x0, 0x3ffff}, DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};/*/ // cp1251SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x100, 0xffffffff, 0x0}, LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x1000000, 0x0, 0xffffffff}, WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x1000100, 0xffffffff, 0xffffffff}, DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};//*///////////////////////////////////////////////////////////////////////////////*void *operator new(size_t sz){ return malloc(sz);};void operator delete(void *v){ free(v);};void *operator new[](size_t sz){ return malloc(sz);};void operator delete[](void *v){ free(v);};//*/bool inline IsDigit(char c){ return DigData.GetBit(c);};bool inline IsWord(char c){ return WdData.GetBit(c);};bool inline IsUpperCase(char c){ return UCData.GetBit(c);};bool inline IsLowerCase(char c){ return LCData.GetBit(c);};char inline LowCase(char c){ if (UCData.GetBit(c)) return c + 0x20; return c;};int GetNumber(int *str, int s, int e){int r = 1, num = 0; if (e < s) return -1; for(int i = e-1; i >= s; i--){ if (str[i] > '9' || str[i] < '0') return -1; num += (str[i] - 0x30)*r; r *= 10; }; return num;};int GetHex(char c){ c = LowCase(c); c -= 0x30; if (c >= 0x31 && c <= 0x36) c -= 0x27; else if (c < 0 || c > 9) return -1; return c;};///////////////////////////////////////////////////////////////////////////////SRegInfo::SRegInfo(){ Next = Parent = 0; un.Param = 0; Op = ReEmpty;};SRegInfo::~SRegInfo(){ if (Next) delete Next; if (un.Param) switch(Op){ case ReEnum: case ReNEnum: delete un.ChrClass; break; default: if (Op > ReBlockOps && Op < ReSymbolOps || Op == ReBrackets) delete un.Param; break; };};////////////////////////////////////////////////////////////////////////////// bitsvoid SCharData::SetBit(unsigned char Bit){ int p = Bit/8; CArr[p] |= (1 << Bit%8);};void SCharData::ClearBit(unsigned char Bit){ int p = Bit/8; CArr[p] &= ~(1 << Bit%8);};bool SCharData::GetBit(unsigned char Bit){ int p = (unsigned char)Bit/8; return (CArr[p] & (1 << Bit%8))!=0;};////////////////////////////////////////////////////////////////////////////// regexp classCRegExp::CRegExp(){ Info = 0; Exprn = 0; NoMoves = false; Error = EERROR; FirstChar = 0; CurMatch = 0; CodePage = 0;};CRegExp::CRegExp(char *Text){ Info = 0; Exprn = 0; NoMoves = false; Error = EERROR; FirstChar = 0; CurMatch = 0; CodePage = 0; if (Text) SetExpr(Text);};CRegExp::~CRegExp(){ if (Info) delete Info;};bool CRegExp::SetExpr(LPCSTR Expr){ if (!this) return false; Error = EERROR; CurMatch = 0; Error = SetExprLow(Expr); return Error == EOK;};bool CRegExp::isok(){ return Error == EOK;};EError CRegExp::geterror(){ return Error;};EError CRegExp::SetExprLow(const char *Expr){int EnterBr = 0, EnterGr = 0, EnterFg = 0;int pos, tmp, i, j, s = 0;bool Ok = false;int Len = 0; while (Expr[Len]) Len++; if (!Len) return EERROR; if (Info) delete Info; Info = new SRegInfo; Exprn = new int[Len]; NoCase = false; Extend = false; if (Expr && Expr[0] == '/') s++; else return ESYNTAX; for (i = Len; i > 0 && !Ok;i--) if (Expr[i] == '/'){ Len = i-s; Ok = true; for (int j = i+1; Expr[j]; j++){ if (Expr[j] == 'i') NoCase = true; if (Expr[j] == 'x') Extend = true; }; }; if (!Ok) return ESYNTAX; // for (j = 0,pos = 0; j < Len; j++,pos++){ if (Extend && (Expr[j+s] == ' ' || Expr[j+s] == '\n'|| Expr[j+s] == '\r')){ pos--; continue; }; Exprn[pos] = (int)(unsigned char)Expr[j+s]; if (Expr[j+s] == BACKSLASH){ switch (Expr[j+s+1]){ case 'd': Exprn[pos] = ReDigit; break; case 'D': Exprn[pos] = ReNDigit; break; case 'w': Exprn[pos] = ReWordSymb; break; case 'W': Exprn[pos] = ReNWordSymb; break; case 's': Exprn[pos] = ReWSpace; break; case 'S': Exprn[pos] = ReNWSpace; break; case 'u': Exprn[pos] = ReUCase; break; case 'l': Exprn[pos] = ReNUCase; break; case 't': Exprn[pos] = '\t'; break; case 'n': Exprn[pos] = '\n'; break; case 'r': Exprn[pos] = '\r'; break; case 'b': Exprn[pos] = ReWBound; break; case 'B': Exprn[pos] = ReNWBound; break; case 'c': Exprn[pos] = RePreNW; break; case 'm': Exprn[pos] = ReStart; break; case 'M': Exprn[pos] = ReEnd; break; case 'x': tmp = GetHex(Expr[j+s+2]); if (tmp == -1 || GetHex(Expr[j+s+3]) == -1) return ESYNTAX; tmp = (tmp<<4) + GetHex(Expr[j+s+3]); Exprn[pos] = tmp; j += 2; break; case 'y': tmp = GetHex(Expr[j+s+2]); if (tmp == -1) return ESYNTAX; Exprn[pos] = ReBkTrace + tmp; j++; break; default: tmp = GetHex(Expr[j+s+1]); if (tmp != -1){ Exprn[pos] = ReBkBrack + tmp; break; }else Exprn[pos] = Expr[j+s+1]; break; }; j++; continue; }; if (Expr[j+s] == ']'){ Exprn[pos] = ReEnumE; if (EnterFg || !EnterGr) return EBRACKETS; EnterGr--; }; if (Expr[j+s] == '-' && EnterGr) Exprn[pos] = ReFrToEnum; if (EnterGr) continue; if (Expr[j+s] == '[' && Expr[j+s+1] == '^'){ Exprn[pos] = ReNEnumS; if (EnterFg) return EBRACKETS; EnterGr++; j++; continue; }; if (Expr[j+s] == '*' && Expr[j+s+1] == '?'){ Exprn[pos] = ReNGMul; j++; continue; }; if (Expr[j+s] == '+' && Expr[j+s+1] == '?'){ Exprn[pos] = ReNGPlus; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '?'){ Exprn[pos] = ReNGQuest; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '#' && Expr[j+s+2]>='0' && Expr[j+s+2]<='9'){ Exprn[pos] = ReBehind+Expr[j+s+2]-0x30; j+=2; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '~' && Expr[j+s+2]>='0' && Expr[j+s+2]<='9'){ Exprn[pos] = ReNBehind+Expr[j+s+2]-0x30; j+=2; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '='){ Exprn[pos] = ReAhead; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '!'){ Exprn[pos] = ReNAhead; j++; continue; }; if (Expr[j+s] == '('){ Exprn[pos] = ReLBrack; if (EnterFg) return EBRACKETS; EnterBr++; }; if (Expr[j+s] == ')'){ Exprn[pos] = ReRBrack; if (!EnterBr || EnterFg) return EBRACKETS; EnterBr--; }; if (Expr[j+s] == '['){ Exprn[pos] = ReEnumS; if (EnterFg) return EBRACKETS; EnterGr++; }; if (Expr[j+s] == '{'){ Exprn[pos] = ReRangeS; if (EnterFg) return EBRACKETS; EnterFg++; }; if (Expr[j+s] == '}' && Expr[j+s+1] == '?'){ Exprn[pos] = ReNGRangeE; if (!EnterFg) return EBRACKETS; EnterFg--; j++; continue; }; if (Expr[j+s] == '}'){ Exprn[pos] = ReRangeE; if (!EnterFg) return EBRACKETS; EnterFg--; }; if (Expr[j+s] == '^') Exprn[pos] = ReSoL; if (Expr[j+s] == '$') Exprn[pos] = ReEoL; if (Expr[j+s] == '.') Exprn[pos] = ReAnyChr; if (Expr[j+s] == '*') Exprn[pos] = ReMul; if (Expr[j+s] == '+') Exprn[pos] = RePlus; if (Expr[j+s] == '?') Exprn[pos] = ReNGQuest;//ReQuest; if (Expr[j+s] == '|') Exprn[pos] = ReOr; }; if (EnterGr || EnterBr || EnterFg) return EBRACKETS; Info->Op = ReBrackets; Info->un.Param = new SRegInfo; Info->s = CurMatch++; EError err = SetStructs(Info->un.Param,0,pos); delete Exprn; if (err) return err; Optimize(); return EOK;};void CRegExp::Optimize(){PRegInfo Next = Info; FirstChar = 0; while(Next){ if (Next->Op == ReBrackets || Next->Op == RePlus || Next->Op == ReNGPlus){ Next = Next->un.Param; continue; }; if (Next->Op == ReSymb){ if (Next->un.Symb & 0xFF00 && Next->un.Symb != ReSoL && Next->un.Symb != ReWBound) break; FirstChar = Next->un.Symb; break; }; break; };};EError CRegExp::SetStructs(PRegInfo &re,int start,int end){PRegInfo Next,Prev,Prev2;int comma,st,en,ng,i, j,k;int EnterBr;bool Add; if (end - start < 0) return EERROR; Next = re; for (i = start; i < end; i++){ Add = false; // Ops if (Exprn[i] > ReBlockOps && Exprn[i] < ReSymbolOps){ Next->un.Param = 0; Next->Op = (EOps)Exprn[i]; Add = true; }; // {n,m} if (Exprn[i] == ReRangeS){ st = i; en = -1; comma = -1; ng = 0; for (j = i;j < end;j++){ if (Exprn[j] == ReNGRangeE){ en = j; ng = 1; break; }; if (Exprn[j] == ReRangeE){ en = j; break; }; if ((char)Exprn[j] == ',') comma = j; }; if (en == -1) return EBRACKETS; if (comma == -1) comma = en; Next->s = GetNumber(Exprn,st+1,comma); if (comma != en) Next->e = GetNumber(Exprn,comma+1,en); else Next->e = Next->s; Next->un.Param = 0; Next->Op = ng?ReNGRangeNM:ReRangeNM; if (en-comma == 1){ Next->e = -1; Next->Op = ng?ReNGRangeN:ReRangeN; }; i=j; Add = true; }; // [] [^] if (Exprn[i] == ReEnumS || Exprn[i] == ReNEnumS){ Next->Op = (Exprn[i] == ReEnumS)?ReEnum:ReNEnum; for (j = i+1;j < end;j++){ if (Exprn[j] == ReEnumE) break; }; if (j == end) return EBRACKETS; Next->un.ChrClass = new SCharData; for(k = 0; k < 8; k++) Next->un.ChrClass->IArr[k] = 0x0; for (j = i+1;Exprn[j] != ReEnumE;j++){ if (Exprn[j+1] == ReFrToEnum){ for (i = (Exprn[j]&0xFF); i < (Exprn[j+2]&0xFF);i++) Next->un.ChrClass->SetBit(i&0xFF); j++; continue; }; switch(Exprn[j]){ case ReDigit: for (k = 0x30;k < 0x40;k++) if (IsDigit((char)k)) Next->un.ChrClass->SetBit(k); break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -