📄 listcontainer.cpp
字号:
char* ListContainer::findStartsWith(char* string) { if (items > 0) { int r = searchRSW(0, items-1, string); if (r >= 0) { return (data + list[r]); } } char *rc; for(unsigned int i = 0; i < morelists.size(); i++) { rc = (*o.lm.l[morelists[i]]).findStartsWith(string); if (rc != NULL) { return rc; } } return NULL;}char* ListContainer::findStartsWithPartial(char* string) { if (items > 0) { int r = searchRSW(0, items-1, string); if (r >= 0) { return (data + list[r]); } if (r < -1) { r = 0 - r - 2; return (data + list[r]); // nearest match } } char *rc; for(unsigned int i = 0; i < morelists.size(); i++) { rc = (*o.lm.l[morelists[i]]).findStartsWithPartial(string); if (rc != NULL) { return rc; } } return NULL;}char* ListContainer::findEndsWith(char* string) { if (items > 0) { int r = searchREW(0, items-1, string); if (r >= 0) { return (data + list[r]); } } char *rc; for(unsigned int i = 0; i < morelists.size(); i++) { rc = (*o.lm.l[morelists[i]]).findEndsWith(string); if (rc != NULL) { return rc; } } return NULL;}std::string ListContainer::getItemAt(char *index) { std::string s = index; return s;}std::string ListContainer::getItemAtInt(int index) { char* o = data + list[index]; std::string s = o; return s;}int ListContainer::getWeightAt(unsigned int index) { return weight[index];}int ListContainer::getTypeAt(unsigned int index) { return itemtype[index];}void ListContainer::endsWithSort() { // sort by ending of line for(unsigned int i = 0; i < morelists.size(); i++) { (*o.lm.l[morelists[i]]).endsWithSort(); } if (items < 2 || issorted) return; quicksortEW(0, items - 1); isSW = false; issorted = true; return;}void ListContainer::startsWithSort() { // sort by start of line for(unsigned int i = 0; i < morelists.size(); i++) { (*o.lm.l[morelists[i]]).startsWithSort(); } if (items < 2 || issorted) return; quicksortSW(0, items - 1); isSW = true; issorted = true; return;}bool ListContainer::createCacheFile() { unsigned int i; for(i = 0; i < morelists.size(); i++) { (*o.lm.l[morelists[i]]).createCacheFile(); } if (isCacheFileNewer(sourcefile.toCharArray())) { // only do if it needs updating return true; } if (items < 1000) { // There is little to gain when there are so few return true; } String f = sourcefile; f += ".processed"; #ifdef DGDEBUG std::cout << "creating processed file:" << f << std::endl; #endif ofstream listfile(f.toCharArray(), ios::out); if (listfile.fail()) { if (!isDaemonised) { std::cerr << "Error creating cache file." << std::endl; std::cerr << "Do you have write access to this area:" << std::endl; std::cerr << f << std::endl; } syslog(LOG_ERR, "%s","Error cache file."); syslog(LOG_ERR, "%s","Do you have write access to this area:"); syslog(LOG_ERR, "%s",f.toCharArray()); return false; } for(i = 0; i < morelists.size(); i++) { f = ".Include<"; f += (*o.lm.l[morelists[i]]).sourcefile; f += ">\n"; listfile.write(f.toCharArray(), f.length()); } char* offset; for (i = 0; i < (unsigned)items; i++) { // write the entries in order offset = data + list[i]; listfile.write(offset, strlen(offset)); listfile.put('\n'); // newline per entry } listfile.close(); return true;}void ListContainer::makeGraph(int fqs) { force_quick_search = fqs; if (data_length == 0) return; int i; if (force_quick_search == 1) { for (i = 0; i < items; i++) { slowgraph.push_back(i); } return; } std::string s; std::string lasts; graphused = true; graphdata = new int[64 * data_length]; graphitems++; memset(graphdata, 0, sizeof(int) * 64 * data_length); std::deque<unsigned int> sizelist; for (i = 0; i < items; i++) { sizelist.push_back(i); } graphSizeSort(0, items - 1, &sizelist); for (i = 0; i < items; i++) { s = getItemAt(data + list[sizelist[i]]); graphAdd(s.c_str(), 0, sizelist[i]); } int ml = graphdata[2]; int branches; for (i = ml - 1; i >= 0; i--) { branches = graphFindBranches(graphdata[4 + i]); if (branches < 12) { // quicker to use B-M on node with few branches graphCopyNodePhrases(graphdata[4 + i]); //remove link to this node and so effectively remove all nodes // it links to but don't recover the memory as its not worth it for (int j = i; j < ml; j++) { graphdata[4 + j] = graphdata[4 + j + 1]; } graphdata[2]--; } }}void ListContainer::graphSizeSort(int l, int r, std::deque<unsigned int>* sizelist) { if (r <= l) return; unsigned int e; int k; unsigned int v = getItemAtInt((*sizelist)[r]).length(); int i = l-1, j = r, p = i, q = r; for (;;) { while (getItemAtInt((*sizelist)[++i]).length() < v); while (v < getItemAtInt((*sizelist)[--j]).length()) { if (j == l) break; } if (i >= j) break; e = (*sizelist)[i]; (*sizelist)[i] = (*sizelist)[j]; (*sizelist)[j] = e; if (v == getItemAtInt((*sizelist)[i]).length()) { p++; e = (*sizelist)[p]; (*sizelist)[p] = (*sizelist)[i]; (*sizelist)[i] = e; } if (v == getItemAtInt((*sizelist)[j]).length()) { q--; e = (*sizelist)[q]; (*sizelist)[q] = (*sizelist)[j]; (*sizelist)[j] = e; } } e = (*sizelist)[i]; (*sizelist)[i] = (*sizelist)[r]; (*sizelist)[r] = e; j = i - 1; i++; for (k = l; k <= p; k++, j--) { e = (*sizelist)[k]; (*sizelist)[k] = (*sizelist)[j]; (*sizelist)[j] = e; } for (k = r-1; k >= q; k--, i++) { e = (*sizelist)[k]; (*sizelist)[k] = (*sizelist)[i]; (*sizelist)[i] = e; } graphSizeSort(l, j, sizelist); graphSizeSort(i, r, sizelist);} // find the number of branches a node hasint ListContainer::graphFindBranches(unsigned int pos) { int branches = 0; int links = graphdata[pos * 64 + 2]; for (int i = 0; i < links; i++) { branches += graphFindBranches(graphdata[pos * 64 + 4 + i]); } if (links > 1) { branches += links - 1; } return branches;}void ListContainer::graphCopyNodePhrases(unsigned int pos) {// copy into slowgraph deque all different phrases from a root link int links = graphdata[pos * 64 + 2]; int i; for (i = 0; i < links; i++) { graphCopyNodePhrases(graphdata[pos * 64 + 4 + i]); } bool found = false; int size = slowgraph.size(); unsigned int phrasenumber = graphdata[pos * 64 + 3]; for (i = 0; i < size; i++) { if (slowgraph[i] == phrasenumber) { found = true; break; } } if (!found) { slowgraph.push_back(phrasenumber); }}int ListContainer::bmsearch(char* file, int fl, std::string s) { // must match all int j, l; // counters int p; // to hold precalcuated value for speed bool match; // flag int qsBc[256]; // Quick Search Boyer Moore shift table (256 alphabet) char* k; // pointer used in matching int count = 0; int pl = s.length(); char* phrase = new char[pl + 1]; for (j = 0; j < pl; j++) { phrase[j] = s[j]; } phrase[pl] = 0; if (fl < pl) return 0; // reality checking if (pl > 126) return 0; // reality checking // For speed we append the phrase to the end of the memory block so it // is always found, thus eliminating some checking. This is possible as // we know an extra 127 bytes have been provided by NaughtyFilter.cpp // and also the OptionContainer does not allow phrase lengths greater // than 126 chars k = file + fl; for(j = 0; j < pl; j++) { k[j] = s[j]; } // Next we need to make the Quick Search Boyer Moore shift table p = pl + 1; for (j = 0; j < 256; j++) { // Preprocessing qsBc[j] = p; } for (j = 0; j < pl; j++) { // Preprocessing qsBc[(unsigned char)phrase[j]] = pl - j; } // Now do the searching! for(j = 0;;) { k = file + j; match = true; for (l = 0; l < pl; l++) { // quiv, but faster, memcmp() if (k[l] != phrase[l]) { match = false; break; } } if (match) { if (j >= fl) { break; // is the end of file marker } count++; } j += qsBc[(unsigned char)file[j + pl]]; // shift } delete[] phrase; return count;}std::deque<unsigned int> ListContainer::graphSearch(char* doc, int len) { std::deque<unsigned int> result; int i, j, k; int sl; int ppos; sl = slowgraph.size(); for (i = 0; i < sl; i++) { ppos = slowgraph[i]; j = bmsearch(doc, len, getItemAtInt(ppos)); for (k = 0; k < j; k++) { result.push_back(ppos); } } if (force_quick_search == 1 || graphitems==0) { return result; } int ml; int* stack = new int[1024]; int stacksize = 0; unsigned char p; int pos; int depth; ml = graphdata[2] + 4; for (i = 0; i < len; i++) { for (j = 4; j < ml; j++) { pos = graphdata[j]; for(depth = 0;;) { ppos = pos << 6; p = graphdata[ppos]; if (p == doc[i + depth]) { if (graphdata[ppos + 1] == 1) { result.push_back(graphdata[ppos + 3]); } sl = graphdata[ppos + 2]; if (sl > 0) { depth++; for (k = 1; k < sl; k++) { stack[stacksize++] = graphdata[ppos + 4 + k]; stack[stacksize++] = depth; } pos = graphdata[ppos + 4]; continue; } } if (stacksize > 0) { depth = stack[--stacksize]; pos = stack[--stacksize]; continue; } break; } } } delete[] stack; return result;}// Format of the data is each entry has 64 int values with format of:// [letter][last letter flag][num links][from phrase][link0][link1]...void ListContainer::graphAdd(String s, int inx, int item) { unsigned char p = s.charAt(0); unsigned char c; bool found = false; String t; int i, px, it; int numlinks; for (i = 0; i < graphdata[inx * 64 + 2]; i++) { c = (unsigned char)graphdata[(graphdata[inx * 64 + 4 + i]) * 64]; if (p == c) { t = s; t.lop(); if (t.length() > 0) { graphAdd(t, graphdata[inx * 64 + 4 + i], item); return; } found = true; // this means the phrase is already there // as part of an existing phrase px = graphdata[(graphdata[inx * 64 + 4 + i]) * 64 + 1]; if (px == 1) { // the exact phrase is already there px = graphdata[(graphdata[inx * 64 + 4 + i]) * 64 + 3]; it = itemtype[px]; if ((it > 9 && itemtype[item] < 10) || itemtype[item] == -1) { // exists as a combi entry already // if got here existing entry must be a combi AND // new entry is not a combi so we overwrite the // existing values as combi values and types are // stored in the combilist // OR // its a an exception // exception phrases take presidence itemtype[px] = itemtype[item];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -