📄 regexp.cpp
字号:
static char *regexp_id = "@(#)Copyright (C) H.Shirouzu 2005-2006 regexp.cpp ver1.31";/* ======================================================================== Project Name : Regular Expression / Wild Card Match Library Create : 2005-11-03(The) Update : 2006-01-31(Tue) Copyright : H.Shirouzu Reference : ======================================================================== */#include "tlib.h"#include "regexp.h"RegExp::RegExp(){ memset(states_tbl, 0, sizeof(states_tbl)); epsilon_tbl = NULL; end_states = 0; max_state = 0;}RegExp::~RegExp(){ Init();}void RegExp::Init(){ for (int type=NORMAL_TBL; type < MAX_STATES_TBL; type++) { if (states_tbl[type]) { for (int i=0; i < BYTE_NUM; i++) { if (states_tbl[type][i]) delete states_tbl[type][i]; } delete [] states_tbl[type]; states_tbl[type] = NULL; } } if (epsilon_tbl) { delete [] epsilon_tbl; epsilon_tbl = NULL; } max_state = 0; end_states = 0;}void RegExp::AddRegStates(StatesType type, WCHAR ch, const RegStates &state_pattern){ int idx = ch >> BITS_OF_BYTE; RegStates **&tbl = states_tbl[type]; if (!tbl) { tbl = new RegStates *[BYTE_NUM]; memset(tbl, 0, sizeof(RegStates *) * BYTE_NUM); } RegStates *&sub_tbl = tbl[idx]; if (!sub_tbl) { sub_tbl = new RegStates [BYTE_NUM]; memset(sub_tbl, 0, sizeof(RegStates) * BYTE_NUM); } sub_tbl[(u_char)ch] |= state_pattern;}RegStates RegExp::GetRegStates(StatesType type, WCHAR ch){ RegStates **&tbl = states_tbl[type]; RegStates *sub_tbl = tbl ? tbl[ch >> BITS_OF_BYTE] : NULL; return sub_tbl ? sub_tbl[(u_char)ch] : 0;}void RegExp::AddEpStates(int state, const RegStates &add_states){ if (!epsilon_tbl) { epsilon_tbl = new RegStates [sizeof(RegStates)][BYTE_NUM]; memset(epsilon_tbl, 0, sizeof(RegStates) * sizeof(RegStates) * BYTE_NUM); } RegStates *tbl = epsilon_tbl[state / BITS_OF_BYTE]; int bit = 1 << (state % BITS_OF_BYTE); for (int i=0; i < BYTE_NUM; i++) { if (i & bit) tbl[i] |= add_states; }}RegStates RegExp::GetEpStates(RegStates cur_states){ RegStates ret_states = 0; for (int i=0; cur_states; i++) { ret_states |= epsilon_tbl[i][cur_states & 0xff]; cur_states >>= BITS_OF_BYTE; } return ret_states;}void RegExp::AddRegStatesEx(StatesType type, WCHAR ch, RegExp::CaseSense cs){ if (cs == CASE_SENSE) AddRegStates(type, ch, (RegStates)1 << (max_state + 1)); else { AddRegStates(type, (WCHAR)CharLowerV((void *)ch), (RegStates)1 << (max_state + 1)); AddRegStates(type, (WCHAR)CharUpperV((void *)ch), (RegStates)1 << (max_state + 1)); }}BOOL RegExp::RegisterWildCard(const void *wild_str, RegExp::CaseSense cs){ if (max_state + lstrlenV(wild_str) > sizeof(RegStates) * BITS_OF_BYTE - 2) return FALSE; // start & end 偺俀忬懺傪彍偄偨巆傝 AddEpStates(0, (RegStates)1 << ++max_state); enum Mode { NORMAL, CHARCLASS } mode = NORMAL; enum SubMode { DEFAULT, CC_START, CC_NORMAL, CC_RANGE } submode = DEFAULT; int start_state = max_state, escape = 0; StatesType type = NORMAL_TBL; WCHAR ch = 0, last_ch, start_ch, end_ch; do { last_ch = ch; ch = lGetCharIncV(&wild_str); // 0 傕暥枛敾掕暥帤偲偟偰搊榐 switch (escape) { case 1: escape = 2; break; case 2: escape = 0; break; default: if (ch == '\\') { escape = 1; continue; } } if (mode == NORMAL) { if (!escape) { switch (ch) { case '[': mode = CHARCLASS; submode = CC_START; continue; case '?': AddEpStates(max_state, (RegStates)1 << (max_state + 1)); max_state++; continue; case '*': AddEpStates(max_state, (RegStates)1 << (max_state + 0)); continue; } } if (ch || escape || last_ch != '*') { // '*' 偱廔椆偟偨応崌偼 '\0' AddRegStatesEx(type, ch, cs); // 傑偱妋擣偣偢偵敾掕廔椆偝偣傞 max_state++; } } else { // CHARCLASS mode if (submode == CC_START) { submode = CC_NORMAL; end_ch = 0; if ((ch == '^' || ch == '!') && !escape) { type = REV_TBL; continue; } } else if (submode == CC_RANGE) { while (start_ch <= ch) { AddRegStatesEx(type, start_ch, cs); start_ch++; } if (type == REV_TBL) AddEpStates(max_state, (RegStates)1 << (max_state + 1)); submode = CC_NORMAL; continue; } if (ch == ']' && !escape) { mode = NORMAL; type = NORMAL_TBL; max_state++; continue; } if (ch == '-' && !escape) { submode = CC_RANGE; start_ch = end_ch; continue; } AddRegStatesEx(type, end_ch = ch, cs); if (type == REV_TBL) AddEpStates(max_state, (RegStates)1 << (max_state + 1)); } } while (ch); end_states |= (RegStates)1 << max_state; return TRUE;}BOOL RegExp::IsMatch(const void *target){ if (!epsilon_tbl) return FALSE; RegStates total_states = GetEpStates(1); while ((total_states & end_states) == 0 && total_states) { WCHAR ch = lGetCharIncV(&target); // 0 偼暥枛敾掕暥帤偲偟偰棙梡 total_states = ((total_states << 1) & GetRegStates(NORMAL_TBL, ch)) | (GetEpStates(total_states) & ~GetRegStates(REV_TBL, ch)); if (!ch) break; } return (total_states & end_states) ? TRUE : FALSE;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -