📄 regexp.cpp
字号:
//Please refer to http://dansguardian.org/?page=copyright2//for the license for this code.//Written by Daniel Barron (daniel@// jadeb.com).//For support go to http://groups.yahoo.com/group/dansguardian// This program is free software; you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation; either version 2 of the License, or// (at your option) any later version.//// This program is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the// GNU General Public License for more details.//// You should have received a copy of the GNU General Public License// along with this program; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA#include "RegExp.hpp"#include <iostream>RegExp::RegExp():imatched(false),wascompiled(false) {}RegExp::~RegExp() { if (wascompiled) { regfree(®); }}RegExp::RegExp(const RegExp& r) { results.clear(); offsets.clear(); lengths.clear(); unsigned int i; for(i = 0; i < r.results.size(); i++) { results.push_back(r.results[i]); } for(i = 0; i < r.offsets.size(); i++) { offsets.push_back(r.offsets[i]); } for(i = 0; i < r.lengths.size(); i++) { lengths.push_back(r.lengths[i]); } imatched = r.imatched; wascompiled = r.wascompiled; searchstring = r.searchstring; if (wascompiled == true) { if (regcomp(®, searchstring.c_str(), REG_ICASE | REG_EXTENDED)) { regfree(®); imatched = false; wascompiled = false; } }}bool RegExp::comp(const char* exp) { if (wascompiled) { regfree(®); wascompiled = false; } results.clear(); offsets.clear(); lengths.clear(); imatched = false; if (regcomp(®, exp, REG_ICASE | REG_EXTENDED)) { // compile regex regfree(®); return false; // need exception? } wascompiled = true; searchstring = exp; return true;}bool RegExp::match(const char* text) { if (!wascompiled) { return false; // need exception? } char* pos = (char*)text; int i; results.clear(); offsets.clear(); lengths.clear(); imatched = false; regmatch_t *pmatch; pmatch = new regmatch_t[reg.re_nsub + 1]; // to hold result if (!pmatch) { // if it failed delete[] pmatch; imatched = false; return false; // exception? } if (regexec(®, pos, reg.re_nsub + 1, pmatch, 0)) { // run regex delete[] pmatch; imatched = false;// #ifdef DGDEBUG// std::cout << "no match for:" << searchstring << std::endl;// #endif return false; // if no match } size_t matchlen; char* submatch; unsigned int largestoffset; int error = 0; while (error == 0) { largestoffset = 0; for (i = 0; i <= (signed)reg.re_nsub; i++) { if (pmatch[i].rm_so != -1) { matchlen = pmatch[i].rm_eo - pmatch[i].rm_so; submatch = new char[matchlen + 1]; strncpy(submatch, pos + pmatch[i].rm_so, matchlen); submatch[matchlen] = '\0'; results.push_back(std::string(submatch)); offsets.push_back(pmatch[i].rm_so + (pos - text)); lengths.push_back(matchlen); delete[] submatch; if ((pmatch[i].rm_so + matchlen) > largestoffset) { largestoffset = pmatch[i].rm_so + matchlen; } } } if (largestoffset > 0) { pos += largestoffset; error = regexec(®, pos, reg.re_nsub + 1, pmatch, REG_NOTBOL); } else { error = -1; } } imatched = true; delete[] pmatch; #ifdef DGDEBUG std::cout << "match(s) for:" << searchstring << std::endl; #endif return true; // match(s) found}std::string RegExp::result(int i) { if (i >= (signed)results.size() || i < 0) { // reality check return ""; // maybe exception? } return results[i];}unsigned int RegExp::offset(int i) { if (i >= (signed)offsets.size() || i < 0) { // reality check return 0; // maybe exception? } return offsets[i];}unsigned int RegExp::length(int i) { if (i >= (signed)lengths.size() || i < 0) { // reality check return 0; // maybe exception? } return lengths[i];}int RegExp::numberOfMatches() { int i = (signed)results.size(); return i;}bool RegExp::matched() { return imatched; // regexp matches only}// My own version of STL::search() which seems to be 5-6 times fasterchar* RegExp::search(char* file, char* fileend, char* phrase, char* phraseend) { int j, l; // counters int p; // to hold precalcuated value for speed bool match; // flag int qsBc[256]; // Quick Search Boyer Moore shift table (256 alphabet) char* k; // pointer used in matching int pl = phraseend - phrase; // phrase length int fl = (int)(fileend - file) - pl; // file length that could match if (fl < pl) return fileend; // reality checking if (pl > 126) return fileend; // reality checking // For speed we append the phrase to the end of the memory block so it // is always found, thus eliminating some checking. This is possible as // we know an extra 127 bytes have been provided by NaughtyFilter.cpp // and also the OptionContainer does not allow phrase lengths greater // than 126 chars for(j = 0; j < pl; j++) { fileend[j] = phrase[j]; } // Next we need to make the Quick Search Boyer Moore shift table p = pl + 1; for (j = 0; j < 256; j++) { // Preprocessing qsBc[j] = p; } for (j = 0; j < pl; j++) { // Preprocessing qsBc[(unsigned char)phrase[j]] = pl - j; } // Now do the searching! for(j = 0;;) { k = file + j; match = true; for (l = 0; l < pl; l++) { // quiv, but faster, memcmp() if (k[l] != phrase[l]) { match = false; break; } } if (match) { return (j + file); // match found at offset j (but could be the // copy put at fileend) } j += qsBc[(unsigned char)file[j + pl]]; // shift } return fileend; // should never get here as it should always match}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -