⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cregexp.cpp

📁 一个邮件客户端源代码,包括收发邮件,安排日程等很多内容
💻 CPP
📖 第 1 页 / 共 2 页
字号:
////  Copyright (c) Cail Lomecb (Igor Ruskih) 1999-2000 <ruiv@uic.nnov.ru>//  You can use, modify, distribute this code or any other part//  of colorer library in sources or in binaries only according//  to Colorer License (see /doc/license.txt for more information).//#include "stdafx.h"#include "cregexp.h"
//Up: /[A-Z \x80-\x9f \xf0 ]/x//Lo: /[a-z \xa0-\xaf \xe0-\xef \xf1 ]/x//Wd: /[\d _ A-Z a-z \xa0-\xaf \xe0-\xf1 \x80-\x9f]/x/*   // koi8SCharData UCData  = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x80000, 0x0, 0xffffffff},          LCData  = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x8, 0xffffffff, 0x0},          WdData  = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x80008, 0xffffffff, 0xffffffff},          DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};//*///*/ dos866SCharData UCData  = {0x0, 0x0, 0x7fffffe, 0x0, 0xffffffff, 0x0, 0x0, 0x10000},          LCData  = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0xffff, 0x0, 0x2ffff},          WdData  = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0xffffffff, 0xffff, 0x0, 0x3ffff},          DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};/*/   // cp1251SCharData UCData  = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x100, 0xffffffff, 0x0},          LCData  = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x1000000, 0x0, 0xffffffff},          WdData  = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x1000100, 0xffffffff, 0xffffffff},          DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};//*///////////////////////////////////////////////////////////////////////////////*void *operator new(size_t sz){  return malloc(sz);};void operator delete(void *v){  free(v);};void *operator new[](size_t sz){  return malloc(sz);};void operator delete[](void *v){  free(v);};//*/bool inline IsDigit(char c){  return DigData.GetBit(c);};bool inline IsWord(char c){  return WdData.GetBit(c);};bool inline IsUpperCase(char c){  return UCData.GetBit(c);};bool inline IsLowerCase(char c){  return LCData.GetBit(c);};char inline LowCase(char c){  if (UCData.GetBit(c))    return c + 0x20;  return c;};int GetNumber(int *str, int s, int e){int r = 1, num = 0;  if (e < s) return -1;  for(int i = e-1; i >= s; i--){    if (str[i] > '9' || str[i] < '0') return -1;    num += (str[i] - 0x30)*r;    r *= 10;  };  return num;};int GetHex(char c){  c = LowCase(c);  c -= 0x30;  if (c >= 0x31 && c <= 0x36) c -= 0x27;  else if (c < 0 || c > 9) return -1;  return c;};///////////////////////////////////////////////////////////////////////////////SRegInfo::SRegInfo(){  Next = Parent = 0;  un.Param = 0;  Op = ReEmpty;};SRegInfo::~SRegInfo(){  if (Next) delete Next;  if (un.Param)    switch(Op){      case ReEnum:      case ReNEnum:        delete un.ChrClass;        break;      default:        if (Op > ReBlockOps && Op < ReSymbolOps || Op == ReBrackets)          delete un.Param;        break;    };};////////////////////////////////////////////////////////////////////////////// bitsvoid SCharData::SetBit(unsigned char Bit){  int p = Bit/8;  CArr[p] |= (1 << Bit%8);};void SCharData::ClearBit(unsigned char Bit){  int p = Bit/8;  CArr[p] &= ~(1 << Bit%8);};bool SCharData::GetBit(unsigned char Bit){  int p = (unsigned char)Bit/8;  return (CArr[p] & (1 << Bit%8))!=0;};////////////////////////////////////////////////////////////////////////////// regexp classCRegExp::CRegExp(){  Info = 0;  Exprn = 0;  NoMoves = false;  Error = EERROR;  FirstChar = 0;  CurMatch = 0;  CodePage = 0;};CRegExp::CRegExp(char *Text){  Info = 0;  Exprn = 0;  NoMoves = false;  Error = EERROR;  FirstChar = 0;  CurMatch = 0;  CodePage = 0;  if (Text) SetExpr(Text);};CRegExp::~CRegExp(){  if (Info) delete Info;};bool CRegExp::SetExpr(LPCSTR Expr){  if (!this) return false;  Error = EERROR;  CurMatch = 0;  Error = SetExprLow(Expr);  return Error == EOK;};bool CRegExp::isok(){  return Error == EOK;};EError CRegExp::geterror(){  return Error;};EError CRegExp::SetExprLow(const char *Expr){int  EnterBr = 0, EnterGr = 0, EnterFg = 0;int  pos, tmp, i, j, s = 0;bool Ok = false;int  Len = 0;  while (Expr[Len]) Len++;  if (!Len) return EERROR;  if (Info) delete Info;  Info = new SRegInfo;  Exprn = new int[Len];  NoCase = false;  Extend = false;  if (Expr && Expr[0] == '/') s++;  else return ESYNTAX;  for (i = Len; i > 0 && !Ok;i--)    if (Expr[i] == '/'){      Len = i-s;      Ok = true;      for (int j = i+1; Expr[j]; j++){        if (Expr[j] == 'i') NoCase = true;        if (Expr[j] == 'x') Extend = true;      };    };  if (!Ok) return ESYNTAX;  //  for (j = 0,pos = 0; j < Len; j++,pos++){    if (Extend && (Expr[j+s] == ' ' || Expr[j+s] == '\n'|| Expr[j+s] == '\r')){      pos--;      continue;    };    Exprn[pos] = (int)(unsigned char)Expr[j+s];    if (Expr[j+s] == BACKSLASH){      switch (Expr[j+s+1]){        case 'd':          Exprn[pos] = ReDigit;          break;        case 'D':          Exprn[pos] = ReNDigit;          break;        case 'w':          Exprn[pos] = ReWordSymb;          break;        case 'W':          Exprn[pos] = ReNWordSymb;          break;        case 's':          Exprn[pos] = ReWSpace;          break;        case 'S':          Exprn[pos] = ReNWSpace;          break;        case 'u':          Exprn[pos] = ReUCase;          break;        case 'l':          Exprn[pos] = ReNUCase;          break;        case 't':          Exprn[pos] = '\t';          break;        case 'n':          Exprn[pos] = '\n';          break;        case 'r':          Exprn[pos] = '\r';          break;        case 'b':          Exprn[pos] = ReWBound;          break;        case 'B':          Exprn[pos] = ReNWBound;          break;        case 'c':          Exprn[pos] = RePreNW;          break;        case 'm':          Exprn[pos] = ReStart;          break;        case 'M':          Exprn[pos] = ReEnd;          break;        case 'x':          tmp = GetHex(Expr[j+s+2]);          if (tmp == -1 || GetHex(Expr[j+s+3]) == -1) return ESYNTAX;          tmp = (tmp<<4) + GetHex(Expr[j+s+3]);          Exprn[pos] = tmp;          j += 2;          break;        case 'y':          tmp = GetHex(Expr[j+s+2]);          if (tmp == -1) return ESYNTAX;          Exprn[pos] = ReBkTrace + tmp;          j++;          break;        default:          tmp = GetHex(Expr[j+s+1]);          if (tmp != -1){            Exprn[pos] = ReBkBrack + tmp;            break;          }else            Exprn[pos] = Expr[j+s+1];          break;      };      j++;      continue;    };    if (Expr[j+s] == ']'){      Exprn[pos] = ReEnumE;      if (EnterFg || !EnterGr) return EBRACKETS;      EnterGr--;    };    if (Expr[j+s] == '-' && EnterGr) Exprn[pos] = ReFrToEnum;    if (EnterGr) continue;    if (Expr[j+s] == '[' && Expr[j+s+1] == '^'){      Exprn[pos] = ReNEnumS;      if (EnterFg) return EBRACKETS;      EnterGr++;      j++;      continue;    };    if (Expr[j+s] == '*' && Expr[j+s+1] == '?'){      Exprn[pos] = ReNGMul;      j++;      continue;    };    if (Expr[j+s] == '+' && Expr[j+s+1] == '?'){      Exprn[pos] = ReNGPlus;      j++;      continue;    };    if (Expr[j+s] == '?' && Expr[j+s+1] == '?'){      Exprn[pos] = ReNGQuest;      j++;      continue;    };    if (Expr[j+s] == '?' && Expr[j+s+1] == '#' &&        Expr[j+s+2]>='0' && Expr[j+s+2]<='9'){      Exprn[pos] = ReBehind+Expr[j+s+2]-0x30;      j+=2;      continue;    };    if (Expr[j+s] == '?' && Expr[j+s+1] == '~' &&        Expr[j+s+2]>='0' && Expr[j+s+2]<='9'){      Exprn[pos] = ReNBehind+Expr[j+s+2]-0x30;      j+=2;      continue;    };    if (Expr[j+s] == '?' && Expr[j+s+1] == '='){      Exprn[pos] = ReAhead;      j++;      continue;    };    if (Expr[j+s] == '?' && Expr[j+s+1] == '!'){      Exprn[pos] = ReNAhead;      j++;      continue;    };    if (Expr[j+s] == '('){      Exprn[pos] = ReLBrack;      if (EnterFg) return EBRACKETS;      EnterBr++;    };    if (Expr[j+s] == ')'){      Exprn[pos] = ReRBrack;      if (!EnterBr || EnterFg) return EBRACKETS;      EnterBr--;    };    if (Expr[j+s] == '['){      Exprn[pos] = ReEnumS;      if (EnterFg) return EBRACKETS;      EnterGr++;    };    if (Expr[j+s] == '{'){      Exprn[pos] = ReRangeS;      if (EnterFg) return EBRACKETS;      EnterFg++;    };    if (Expr[j+s] == '}' && Expr[j+s+1] == '?'){      Exprn[pos] = ReNGRangeE;      if (!EnterFg) return EBRACKETS;      EnterFg--;      j++;      continue;    };    if (Expr[j+s] == '}'){      Exprn[pos] = ReRangeE;      if (!EnterFg) return EBRACKETS;      EnterFg--;    };    if (Expr[j+s] == '^') Exprn[pos] = ReSoL;    if (Expr[j+s] == '$') Exprn[pos] = ReEoL;    if (Expr[j+s] == '.') Exprn[pos] = ReAnyChr;    if (Expr[j+s] == '*') Exprn[pos] = ReMul;    if (Expr[j+s] == '+') Exprn[pos] = RePlus;    if (Expr[j+s] == '?') Exprn[pos] = ReNGQuest;//ReQuest;    if (Expr[j+s] == '|') Exprn[pos] = ReOr;  };  if (EnterGr || EnterBr || EnterFg) return EBRACKETS;  Info->Op = ReBrackets;  Info->un.Param = new SRegInfo;  Info->s = CurMatch++;  EError err = SetStructs(Info->un.Param,0,pos);  delete Exprn;  if (err) return err;  Optimize();  return EOK;};void CRegExp::Optimize(){PRegInfo Next = Info;  FirstChar = 0;  while(Next){    if (Next->Op == ReBrackets || Next->Op == RePlus  || Next->Op == ReNGPlus){      Next = Next->un.Param;      continue;    };    if (Next->Op == ReSymb){      if (Next->un.Symb & 0xFF00 &&  Next->un.Symb != ReSoL && Next->un.Symb != ReWBound)        break;      FirstChar = Next->un.Symb;      break;    };    break;  };};EError CRegExp::SetStructs(PRegInfo &re,int start,int end){PRegInfo Next,Prev,Prev2;int comma,st,en,ng,i, j,k;int EnterBr;bool Add;  if (end - start < 0) return EERROR;  Next = re;  for (i = start; i < end; i++){    Add = false;    // Ops    if (Exprn[i] > ReBlockOps && Exprn[i] < ReSymbolOps){      Next->un.Param = 0;      Next->Op = (EOps)Exprn[i];      Add = true;    };    // {n,m}    if (Exprn[i] == ReRangeS){      st = i;      en = -1;      comma = -1;      ng = 0;      for (j = i;j < end;j++){        if (Exprn[j] == ReNGRangeE){          en = j;          ng = 1;          break;        };        if (Exprn[j] == ReRangeE){          en = j;          break;        };        if ((char)Exprn[j] == ',')          comma = j;      };      if (en == -1) return EBRACKETS;      if (comma == -1) comma = en;      Next->s = GetNumber(Exprn,st+1,comma);      if (comma != en)        Next->e = GetNumber(Exprn,comma+1,en);      else        Next->e = Next->s;      Next->un.Param = 0;      Next->Op = ng?ReNGRangeNM:ReRangeNM;      if (en-comma == 1){        Next->e = -1;        Next->Op = ng?ReNGRangeN:ReRangeN;      };      i=j;      Add = true;    };    // [] [^]    if (Exprn[i] == ReEnumS || Exprn[i] == ReNEnumS){      Next->Op = (Exprn[i] == ReEnumS)?ReEnum:ReNEnum;      for (j = i+1;j < end;j++){        if (Exprn[j] == ReEnumE)          break;      };      if (j == end) return EBRACKETS;      Next->un.ChrClass = new SCharData;      for(k = 0; k < 8; k++)        Next->un.ChrClass->IArr[k] = 0x0;      for (j = i+1;Exprn[j] != ReEnumE;j++){        if (Exprn[j+1] == ReFrToEnum){          for (i = (Exprn[j]&0xFF); i < (Exprn[j+2]&0xFF);i++)            Next->un.ChrClass->SetBit(i&0xFF);          j++;          continue;        };        switch(Exprn[j]){          case ReDigit:            for (k = 0x30;k < 0x40;k++)              if (IsDigit((char)k))                Next->un.ChrClass->SetBit(k);            break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -