📄 listcontainer.cpp

📁 一个UNIX/LINUX下的基于内容的过滤服务器源代码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
char* ListContainer::findStartsWith(char* string) {    if (items > 0) {        int r = searchRSW(0, items-1, string);        if (r >= 0) {            return (data + list[r]);        }    }    char *rc;    for(unsigned int i = 0; i < morelists.size(); i++) {        rc = (*o.lm.l[morelists[i]]).findStartsWith(string);        if (rc != NULL) {            return rc;        }    }    return NULL;}char* ListContainer::findStartsWithPartial(char* string) {    if (items > 0) {        int r = searchRSW(0, items-1, string);        if (r >= 0) {            return (data + list[r]);        }        if (r < -1) {            r = 0 - r - 2;            return (data + list[r]); // nearest match        }    }    char *rc;    for(unsigned int i = 0; i < morelists.size(); i++) {        rc = (*o.lm.l[morelists[i]]).findStartsWithPartial(string);        if (rc != NULL) {            return rc;        }    }    return NULL;}char* ListContainer::findEndsWith(char* string) {    if (items > 0) {        int r = searchREW(0, items-1, string);        if (r >= 0) {            return (data + list[r]);        }    }    char *rc;    for(unsigned int i = 0; i < morelists.size(); i++) {        rc = (*o.lm.l[morelists[i]]).findEndsWith(string);        if (rc != NULL) {            return rc;        }    }    return NULL;}std::string ListContainer::getItemAt(char *index) {    std::string s = index;    return s;}std::string ListContainer::getItemAtInt(int index) {    char* o = data + list[index];    std::string s = o;    return s;}int ListContainer::getWeightAt(unsigned int index) {    return weight[index];}int ListContainer::getTypeAt(unsigned int index) {    return itemtype[index];}void ListContainer::endsWithSort() {  // sort by ending of line    for(unsigned int i = 0; i < morelists.size(); i++) {        (*o.lm.l[morelists[i]]).endsWithSort();    }    if (items < 2 || issorted) return;    quicksortEW(0, items - 1);    isSW = false;    issorted = true;    return;}void ListContainer::startsWithSort() {  // sort by start of line    for(unsigned int i = 0; i < morelists.size(); i++) {        (*o.lm.l[morelists[i]]).startsWithSort();    }    if (items < 2 || issorted) return;    quicksortSW(0, items - 1);    isSW = true;    issorted = true;    return;}bool ListContainer::createCacheFile() {    unsigned int i;    for(i = 0; i < morelists.size(); i++) {        (*o.lm.l[morelists[i]]).createCacheFile();    }    if (isCacheFileNewer(sourcefile.toCharArray())) {  // only do if it needs updating        return true;    }    if (items < 1000) {  // There is little to gain when there are so few        return true;    }    String f = sourcefile;    f += ".processed";    #ifdef DGDEBUG        std::cout << "creating processed file:" << f << std::endl;    #endif    ofstream listfile(f.toCharArray(), ios::out);    if (listfile.fail()) {        if (!isDaemonised) {            std::cerr << "Error creating cache file." << std::endl;            std::cerr << "Do you have write access to this area:" << std::endl;            std::cerr << f << std::endl;        }        syslog(LOG_ERR, "%s","Error cache file.");        syslog(LOG_ERR, "%s","Do you have write access to this area:");        syslog(LOG_ERR, "%s",f.toCharArray());        return false;    }    for(i = 0; i < morelists.size(); i++) {        f = ".Include<";        f += (*o.lm.l[morelists[i]]).sourcefile;        f += ">\n";        listfile.write(f.toCharArray(), f.length());    }    char* offset;    for (i = 0; i < (unsigned)items; i++) {  // write the entries in order        offset = data + list[i];        listfile.write(offset, strlen(offset));        listfile.put('\n');  // newline per entry    }    listfile.close();    return true;}void ListContainer::makeGraph(int fqs) {    force_quick_search = fqs;    if (data_length == 0) return;    int i;    if (force_quick_search == 1) {        for (i = 0; i < items; i++) {            slowgraph.push_back(i);        }        return;    }    std::string s;    std::string lasts;    graphused = true;    graphdata = new int[64 * data_length];    graphitems++;    memset(graphdata, 0, sizeof(int) * 64 * data_length);    std::deque<unsigned int> sizelist;    for (i = 0; i < items; i++) {        sizelist.push_back(i);    }    graphSizeSort(0, items - 1, &sizelist);    for (i = 0; i < items; i++) {        s = getItemAt(data + list[sizelist[i]]);        graphAdd(s.c_str(), 0, sizelist[i]);    }    int ml = graphdata[2];    int branches;    for (i = ml - 1; i >= 0; i--) {        branches = graphFindBranches(graphdata[4 + i]);        if (branches < 12) {  // quicker to use B-M on node with few branches            graphCopyNodePhrases(graphdata[4 + i]);            //remove link to this node and so effectively remove all nodes            // it links to but don't recover the memory as its not worth it            for (int j = i; j < ml; j++) {                graphdata[4 + j] = graphdata[4 + j + 1];            }            graphdata[2]--;        }    }}void ListContainer::graphSizeSort(int l, int r, std::deque<unsigned int>* sizelist) {    if (r <= l) return;    unsigned int e;    int k;    unsigned int v = getItemAtInt((*sizelist)[r]).length();    int i = l-1, j = r, p = i, q = r;    for (;;) {        while (getItemAtInt((*sizelist)[++i]).length() < v);        while (v < getItemAtInt((*sizelist)[--j]).length()) {            if (j == l) break;        }        if (i >= j) break;        e = (*sizelist)[i]; (*sizelist)[i] = (*sizelist)[j]; (*sizelist)[j] = e;        if (v == getItemAtInt((*sizelist)[i]).length()) {            p++;            e = (*sizelist)[p]; (*sizelist)[p] = (*sizelist)[i]; (*sizelist)[i] = e;        }        if (v == getItemAtInt((*sizelist)[j]).length()) {            q--;            e = (*sizelist)[q]; (*sizelist)[q] = (*sizelist)[j]; (*sizelist)[j] = e;        }    }    e = (*sizelist)[i]; (*sizelist)[i] = (*sizelist)[r]; (*sizelist)[r] = e;    j = i - 1;    i++;    for (k = l; k <= p; k++, j--) {        e = (*sizelist)[k]; (*sizelist)[k] = (*sizelist)[j]; (*sizelist)[j] = e;    }    for (k = r-1; k >= q; k--, i++) {        e = (*sizelist)[k]; (*sizelist)[k] = (*sizelist)[i]; (*sizelist)[i] = e;    }    graphSizeSort(l, j, sizelist);    graphSizeSort(i, r, sizelist);}  // find the number of branches a node hasint ListContainer::graphFindBranches(unsigned int pos) {    int branches = 0;    int links = graphdata[pos * 64 + 2];    for (int i = 0; i < links; i++) {        branches += graphFindBranches(graphdata[pos * 64 + 4 + i]);    }    if (links > 1) {        branches += links - 1;    }    return branches;}void ListContainer::graphCopyNodePhrases(unsigned int pos) {// copy into slowgraph deque all different phrases from a root link    int links = graphdata[pos * 64 + 2];    int i;    for (i = 0; i < links; i++) {        graphCopyNodePhrases(graphdata[pos * 64 + 4 + i]);    }    bool found = false;    int size = slowgraph.size();    unsigned int phrasenumber = graphdata[pos * 64 + 3];    for (i = 0; i < size; i++) {        if (slowgraph[i] == phrasenumber) {            found = true;            break;        }    }    if (!found) {        slowgraph.push_back(phrasenumber);    }}int ListContainer::bmsearch(char* file, int fl, std::string s) {    // must match all    int j, l;  // counters    int p;  // to hold precalcuated value for speed    bool match;  // flag    int qsBc[256];  // Quick Search Boyer Moore shift table (256 alphabet)    char* k;  // pointer used in matching    int count = 0;    int pl = s.length();    char* phrase = new char[pl + 1];    for (j = 0; j < pl; j++) {        phrase[j] = s[j];    }    phrase[pl] = 0;    if (fl < pl) return 0;  // reality checking    if (pl > 126) return 0;  // reality checking    // For speed we append the phrase to the end of the memory block so it    // is always found, thus eliminating some checking.  This is possible as    // we know an extra 127 bytes have been provided by NaughtyFilter.cpp    // and also the OptionContainer does not allow phrase lengths greater    // than 126 chars    k = file + fl;    for(j = 0; j < pl; j++) {        k[j] = s[j];    }    // Next we need to make the Quick Search Boyer Moore shift table    p = pl + 1;    for (j = 0; j < 256; j++) {  // Preprocessing        qsBc[j] = p;    }    for (j = 0; j < pl; j++) {  // Preprocessing        qsBc[(unsigned char)phrase[j]] = pl - j;    }    // Now do the searching!    for(j = 0;;) {        k = file + j;        match = true;        for (l = 0; l < pl; l++) {  // quiv, but faster, memcmp()            if (k[l] != phrase[l]) {                match = false;                break;            }        }        if (match) {            if (j >= fl) {                break;  // is the end of file marker            }            count++;        }        j += qsBc[(unsigned char)file[j + pl]];  // shift    }    delete[] phrase;    return count;}std::deque<unsigned int> ListContainer::graphSearch(char* doc, int len) {    std::deque<unsigned int> result;    int i, j, k;    int sl;    int ppos;    sl = slowgraph.size();    for (i = 0; i < sl; i++) {        ppos = slowgraph[i];        j = bmsearch(doc, len, getItemAtInt(ppos));        for (k = 0; k < j; k++) {            result.push_back(ppos);        }    }    if (force_quick_search == 1 || graphitems==0) {        return result;    }    int ml;    int* stack = new int[1024];    int stacksize = 0;    unsigned char p;    int pos;    int depth;    ml = graphdata[2] + 4;    for (i = 0; i < len; i++) {        for (j = 4; j < ml; j++) {            pos = graphdata[j];            for(depth = 0;;) {                ppos = pos << 6;                p = graphdata[ppos];                if (p == doc[i + depth]) {                    if (graphdata[ppos + 1] == 1) {                        result.push_back(graphdata[ppos + 3]);                    }                    sl = graphdata[ppos + 2];                    if (sl > 0) {                        depth++;                        for (k = 1; k < sl; k++) {                            stack[stacksize++] = graphdata[ppos + 4 + k];                            stack[stacksize++] = depth;                        }                        pos = graphdata[ppos + 4];                        continue;                    }                }                if (stacksize > 0) {                    depth = stack[--stacksize];                    pos = stack[--stacksize];                    continue;                }                break;            }        }    }    delete[] stack;    return result;}// Format of the data is each entry has 64 int values with format of:// [letter][last letter flag][num links][from phrase][link0][link1]...void ListContainer::graphAdd(String s, int inx, int item) {    unsigned char p = s.charAt(0);    unsigned char c;    bool found = false;    String t;    int i, px, it;    int numlinks;    for (i = 0; i < graphdata[inx * 64 + 2]; i++) {        c = (unsigned char)graphdata[(graphdata[inx * 64 +  4 + i]) * 64];        if (p == c) {            t = s;            t.lop();            if (t.length() > 0) {                graphAdd(t, graphdata[inx * 64 + 4 + i], item);                return;            }            found = true;          // this means the phrase is already there          // as part of an existing phrase            px = graphdata[(graphdata[inx * 64 +  4 + i]) * 64 + 1];            if (px == 1) {  // the exact phrase is already there                px = graphdata[(graphdata[inx * 64 +  4 + i]) * 64 + 3];                it = itemtype[px];                if ((it > 9 && itemtype[item] < 10) || itemtype[item] == -1) {                    // exists as a combi entry already                    // if got here existing entry must be a combi AND                    // new entry is not a combi so we overwrite the                    // existing values as combi values and types are                    // stored in the combilist                    // OR                    // its a an exception                    // exception phrases take presidence                    itemtype[px] = itemtype[item];
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -