📄 naughtyfilter.cpp
字号:
//Please refer to http://dansguardian.org/?page=copyright2//for the license for this code.//Written by Daniel Barron (daniel@//jadeb/.com).//For support go to http://groups.yahoo.com/group/dansguardian// This program is free software; you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation; either version 2 of the License, or// (at your option) any later version.//// This program is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the// GNU General Public License for more details.//// You should have received a copy of the GNU General Public License// along with this program; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA#include <syslog.h>#include <algorithm>#include "NaughtyFilter.hpp"#include "RegExp.hpp"#include "ListContainer.hpp"#include "DataBuffer.hpp"#include <deque>extern OptionContainer o;using namespace std;NaughtyFilter::NaughtyFilter() // set up defaults:isItNaughty(false),isException(false),filtergroup(0),whatIsNaughty(""),whatIsNaughtyLog(""){}void NaughtyFilter::checkme(DataBuffer* body) { int bodylen = (*body).buffer_length; char* rawbody = (*body).data; #ifdef DGDEBUG std::cout << "body len:" << bodylen << std::endl; #endif char* bodylc = new char[bodylen + 128]; // The extra 128 is used for various speed tricks to // squeeze as much speed as possible. char* bodynohtml; bodynohtml = NULL; // to avoid compiler warnings try { // the last thing we need is an exception causing a memory leak int i, j, bodynohtmllen; unsigned char c; // make a copy of the document lowercase char by char if (o.preserve_case == 1) { for (i = 0; i < bodylen; i++) { c = rawbody[i]; if (c == 13 || c == 9 || c == 10) { c = 32; // convert all whitespace to a space } bodylc[i] = c; } } else { for (i = 0; i < bodylen; i++) { c = rawbody[i]; if (c >= 'A' && c <= 'Z') { c = 'a' + c - 'A'; } else if ( c >= 192 && c <= 221) { // for accented chars c += 32; // 224 + c - 192 } else { if (c == 13 || c == 9 || c == 10) { c = 32; // convert all whitespace to a space } } bodylc[i] = c; } } if (o.hex_decode_content == 1) { // Mod suggested by // AFN Tue 8th April 2003 char* hexdecoded = new char[bodylen + 128 + 1]; unsigned char c1; unsigned char c2; unsigned char c3; char hexval[5] = "0x"; // Initializes a "hexadecimal string" hexval[4] = '\0'; char *ptr; // pointer required by strtol // make a copy of the escaped document char by char i = 0; j = 0; while (i < bodylen - 3) { // we lose 3 bytes but what the hell.. c1 = bodylc[i]; c2 = bodylc[i+1]; c3 = bodylc[i+2]; if ( c1 =='%' && (((c2 >= '0') && (c2 <= '9')) || ((c2 >= 'a') && (c2 <= 'f'))) && (((c3 >= '0') && (c3 <= '9')) || ((c3 >= 'a') && (c3 <= 'f'))) ) { hexval[2] = c2; hexval[3] = c3; c = (unsigned char)strtol(hexval, &ptr, 0); i += 3; } else { c = c1; i++; } hexdecoded[j] = c; j++; } if (bodylen > 3) { hexdecoded[bodylen-3] = bodylc[bodylen-3]; hexdecoded[bodylen-2] = bodylc[bodylen-2]; hexdecoded[bodylen-1] = bodylc[bodylen-1]; hexdecoded[bodylen] = '\0'; } delete[] bodylc; bodylc = hexdecoded; } if ((*o.fg[filtergroup]).enable_PICS == 1) { checkPICS(bodylc, bodylen); if (isItNaughty) { delete[] bodylc; return; // Well there is no point in continuing is there? } } if (o.phrase_filter_mode == 0 || o.phrase_filter_mode == 2) { checkphrase(bodylc, bodylen); // check raw if (isItNaughty || isException) { delete[] bodylc; return; // Well there is no point in continuing is there? } } if (o.phrase_filter_mode == 0) { delete[] bodylc; return; // only doing raw mode filtering } bodynohtml = new char[bodylen + 128 + 1]; // we need this extra byte * bool inhtml = false; // to flag if our pointer is within a html <> bool addit; // flag if we should copy this char to filtered version j = 1; bodynohtml[0] = 32; // * for this for(i = 0; i < bodylen; i++) { addit = true; c = bodylc[i]; if (c == '<') { inhtml = true; // flag we are inside a html <> } if (c == '>') { // flag we have just left a html <> inhtml = false; c = 32; } if (inhtml) { addit = false; } if (c == 32) { if (bodynohtml[j - 1] == 32) { // * and this addit = false; } } if (addit) { // if it passed the filters bodynohtml[j++] = c; // copy it to the filtered copy } } bodynohtmllen = j; checkphrase(bodynohtml, bodynohtmllen); } catch (exception& e) {} delete[] bodynohtml; delete[] bodylc;}void NaughtyFilter::checkphrase(char* file, int l) { std::string bannedphrase = ""; std::string weightedphrase = ""; std::string exceptionphrase = ""; int weighting = 0; int numfound; int i, j; std::deque<unsigned int> found = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).graphSearch(file, l); numfound = found.size(); int type, index, weight; int combisize = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).combilist.size(); bool allcmatched = true; bool isfound, wasbefore; std::string s1, s2; // look for combinations first //if banned must wait for exception later bool bannedcombifound = false; std::string combifound = ""; std::string combisofar = ""; for (i = 0; i < combisize; i++) { index = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).combilist[i]; if (index == -2) { if (allcmatched) { type = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).combilist[i + 1]; if (type == -1) { // combination exception isItNaughty = false; isException = true; whatIsNaughtyLog = o.language_list.getTranslation(605); // Combination exception phrase found: whatIsNaughtyLog += combisofar; whatIsNaughty = ""; return; } else if (type == 1) { // combination weighting weight = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).combilist[i + 2]; weighting += weight; if (weightedphrase.length() > 0) { weightedphrase += "+"; } weightedphrase += "("; if (weight < 0) { weightedphrase += "-" + combisofar; } else { weightedphrase += combisofar; } weightedphrase += ")"; combisofar = ""; } else if (type == 0) { // combination banned bannedcombifound = true; combifound = combisofar; } i += 2; } else { allcmatched = true; i += 2; } } else { if (allcmatched) { isfound = false; s1 = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(index); for (j = 0; j < numfound; j++) { if (s1 == (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(found[j])) { isfound = true; break; } } if (!isfound) { allcmatched = false; combisofar = ""; } else { if (combisofar.length() > 0) { combisofar += ", "; } combisofar += s1; } } } } for (i = 0; i < numfound; i++) { type = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getTypeAt(found[i]); // 0=banned, 1=weighted, -1=exception, 2=combi, 3=weightedcombi if (type == 0) { isItNaughty = true; bannedphrase = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(found[i]); } else if (type == 1) { if (o.weighted_phrase_mode == 1) { weight = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getWeightAt(found[i]); weighting += weight; if (o.show_weighted_found == 1) { if (weightedphrase.length() > 0) { weightedphrase += "+"; } if (weight < 0) { weightedphrase += "-"; } weightedphrase += (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(found[i]); } #ifdef DGDEBUG cout << "found weighted phrase:" << (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(found[i]) << std::endl; #endif } else if ((*o.fg[filtergroup]).weighted_phrase_mode == 2) { wasbefore = false; for (j = 0; j < i; j++) { if (found[i] == found[j]) { wasbefore = true; break; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -