naughtyfilter.cpp

来自「一个UNIX/LINUX下的基于内容的过滤服务器源代码」· C++ 代码 · 共 603 行 · 第 1/2 页
CPP
603 行
//Please refer to http://dansguardian.org/?page=copyright2//for the license for this code.//Written by Daniel Barron (daniel@//jadeb/.com).//For support go to http://groups.yahoo.com/group/dansguardian//  This program is free software; you can redistribute it and/or modify//  it under the terms of the GNU General Public License as published by//  the Free Software Foundation; either version 2 of the License, or//  (at your option) any later version.////  This program is distributed in the hope that it will be useful,//  but WITHOUT ANY WARRANTY; without even the implied warranty of//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//  GNU General Public License for more details.////  You should have received a copy of the GNU General Public License//  along with this program; if not, write to the Free Software//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA#include <syslog.h>#include <algorithm>#include "NaughtyFilter.hpp"#include "RegExp.hpp"#include "ListContainer.hpp"#include "DataBuffer.hpp"#include <deque>extern OptionContainer o;using namespace std;NaughtyFilter::NaughtyFilter()  // set up defaults:isItNaughty(false),isException(false),filtergroup(0),whatIsNaughty(""),whatIsNaughtyLog(""){}void NaughtyFilter::checkme(DataBuffer* body) {    int bodylen = (*body).buffer_length;    char* rawbody = (*body).data;    #ifdef DGDEBUG        std::cout << "body len:" << bodylen << std::endl;    #endif    char* bodylc = new char[bodylen + 128];                   // The extra 128 is used for various speed tricks to                   // squeeze as much speed as possible.    char* bodynohtml;    bodynohtml = NULL;  // to avoid compiler warnings    try {  // the last thing we need is an exception causing a memory leak        int i, j, bodynohtmllen;        unsigned char c;        // make a copy of the document lowercase char by char        if (o.preserve_case == 1) {            for (i = 0; i < bodylen; i++) {                c = rawbody[i];                if (c == 13 || c == 9 || c == 10) {                    c = 32;  // convert all whitespace to a space                }                bodylc[i] = c;            }        }        else {            for (i = 0; i < bodylen; i++) {                c = rawbody[i];                if (c >= 'A' && c <= 'Z') {                    c = 'a' + c - 'A';                }                else if ( c >= 192 && c <= 221) { // for accented chars	            c += 32;   // 224 + c - 192                }                else {                    if (c == 13 || c == 9 || c == 10) {                        c = 32;  // convert all whitespace to a space                    }                }                bodylc[i] = c;            }        }        if (o.hex_decode_content == 1) { // Mod suggested by                                         // AFN Tue 8th April 2003            char* hexdecoded = new char[bodylen + 128 + 1];            unsigned char c1;            unsigned char c2;            unsigned char c3;            char hexval[5] = "0x"; // Initializes a "hexadecimal string"            hexval[4] = '\0';            char *ptr; // pointer required by strtol            // make a copy of the escaped document char by char            i = 0;            j = 0;            while (i < bodylen - 3) {  // we lose 3 bytes but what the hell..                c1 = bodylc[i];                c2 = bodylc[i+1];                c3 = bodylc[i+2];                if ( c1 =='%' && (((c2 >= '0') && (c2 <= '9')) || ((c2 >= 'a') && (c2 <= 'f'))) && (((c3 >= '0') && (c3 <= '9')) || ((c3 >= 'a') && (c3 <= 'f'))) ) {	            hexval[2] = c2;                    hexval[3] = c3;	            c = (unsigned char)strtol(hexval, &ptr, 0);    	            i += 3;                }                else {                    c = c1;                    i++;                }                hexdecoded[j] = c;                j++;            }            if (bodylen > 3) {                hexdecoded[bodylen-3] = bodylc[bodylen-3];                hexdecoded[bodylen-2] = bodylc[bodylen-2];                hexdecoded[bodylen-1] = bodylc[bodylen-1];                hexdecoded[bodylen] = '\0';            }            delete[] bodylc;            bodylc = hexdecoded;        }        if ((*o.fg[filtergroup]).enable_PICS == 1) {            checkPICS(bodylc, bodylen);            if (isItNaughty) {                delete[] bodylc;                return;  // Well there is no point in continuing is there?            }        }        if (o.phrase_filter_mode == 0 || o.phrase_filter_mode == 2) {            checkphrase(bodylc, bodylen);  // check raw            if (isItNaughty || isException) {                delete[] bodylc;                return;  // Well there is no point in continuing is there?            }        }        if (o.phrase_filter_mode == 0) {            delete[] bodylc;            return;  // only doing raw mode filtering        }        bodynohtml = new char[bodylen + 128 + 1];                                         // we need this extra byte *        bool inhtml = false;  // to flag if our pointer is within a html <>        bool addit;  // flag if we should copy this char to filtered version        j = 1;        bodynohtml[0] = 32;  // * for this        for(i = 0; i < bodylen; i++) {            addit = true;            c = bodylc[i];            if (c == '<') {                inhtml = true;  // flag we are inside a html <>            }            if (c == '>') {   // flag we have just left a html <>                inhtml = false;                c = 32;            }            if (inhtml) {                addit = false;            }            if (c == 32) {                if (bodynohtml[j - 1] == 32) {  // * and this                    addit = false;                }            }            if (addit) {  // if it passed the filters                bodynohtml[j++] = c;  // copy it to the filtered copy            }        }        bodynohtmllen = j;        checkphrase(bodynohtml, bodynohtmllen);    } catch (exception& e) {}    delete[] bodynohtml;    delete[] bodylc;}void NaughtyFilter::checkphrase(char* file, int l) {    std::string bannedphrase = "";    std::string weightedphrase = "";    std::string exceptionphrase = "";    int weighting = 0;    int numfound;    int i, j;    std::deque<unsigned int> found = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).graphSearch(file, l);    numfound = found.size();    int type, index, weight;    int combisize = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).combilist.size();    bool allcmatched = true;    bool isfound, wasbefore;    std::string s1, s2;    // look for combinations first    //if banned must wait for exception later    bool bannedcombifound = false;    std::string combifound = "";    std::string combisofar = "";    for (i = 0; i < combisize; i++) {        index = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).combilist[i];        if (index == -2) {            if (allcmatched) {                type = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).combilist[i + 1];                if (type == -1) {  // combination exception                    isItNaughty = false;                    isException = true;                    whatIsNaughtyLog = o.language_list.getTranslation(605);                    // Combination exception phrase found:                    whatIsNaughtyLog += combisofar;                    whatIsNaughty = "";                    return;                }                else if (type == 1) {  // combination weighting                    weight = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).combilist[i + 2];                    weighting += weight;                    if (weightedphrase.length() > 0) {                        weightedphrase += "+";                    }                    weightedphrase += "(";                    if (weight < 0) {                        weightedphrase += "-" + combisofar;                    }                    else {                        weightedphrase += combisofar;                    }                    weightedphrase += ")";                    combisofar = "";                }                else if (type == 0) {  // combination banned                    bannedcombifound = true;                    combifound = combisofar;                }                i += 2;            }            else {                allcmatched = true;                i += 2;            }        }        else {            if (allcmatched) {                isfound = false;                s1 = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(index);                for (j = 0; j < numfound; j++) {                    if (s1 == (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(found[j])) {                        isfound = true;                        break;                    }                }                if (!isfound) {                    allcmatched = false;                    combisofar = "";                }                else {                    if (combisofar.length() > 0) {                        combisofar += ", ";                    }                    combisofar += s1;                }            }        }    }    for (i = 0; i < numfound; i++) {        type = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getTypeAt(found[i]);              // 0=banned, 1=weighted, -1=exception, 2=combi, 3=weightedcombi        if (type == 0) {            isItNaughty = true;            bannedphrase = (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(found[i]);        }        else if (type == 1) {            if (o.weighted_phrase_mode == 1) {                weight =  (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getWeightAt(found[i]);                weighting += weight;                if (o.show_weighted_found == 1) {                    if (weightedphrase.length() > 0) {                        weightedphrase += "+";                    }                    if (weight < 0) {                        weightedphrase += "-";                    }                    weightedphrase += (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(found[i]);                }                #ifdef DGDEBUG                    cout << "found weighted phrase:" << (*o.lm.l[(*o.fg[filtergroup]).banned_phrase_list]).getItemAtInt(found[i]) << std::endl;                #endif            }            else if ((*o.fg[filtergroup]).weighted_phrase_mode == 2) {                wasbefore = false;                for (j = 0; j < i; j++) {                    if (found[i] == found[j]) {                        wasbefore = true;                        break;                    }
naughtyfilter.cpp - 源码说明

本页面展示了「一个UNIX/LINUX下的基于内容的过滤服务器源代码」中的 naughtyfilter.cpp 源码文件，采用 C++ 编程语言编写，共 603 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与LINUX相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?