📄 lib.cpp
字号:
bFound=true; } g_free(casestr); } } if (bIsPureEnglish(sWord)) { // If not Found , try other status of sWord. int iWordLen=strlen(sWord); bool isupcase; gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1); //cut one char "s" or "d" if(!bFound && iWordLen>1) { isupcase = sWord[iWordLen-1]=='S' || !strncmp(&sWord[iWordLen-2],"ED",2); if (isupcase || sWord[iWordLen-1]=='s' || !strncmp(&sWord[iWordLen-2],"ed",2)) { strcpy(sNewWord,sWord); sNewWord[iWordLen-1]='\0'; // cut "s" or "d" if (oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } //cut "ly" if(!bFound && iWordLen>2) { isupcase = !strncmp(&sWord[iWordLen-2],"LY",2); if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) { strcpy(sNewWord,sWord); sNewWord[iWordLen-2]='\0'; // cut "ly" if (iWordLen>5 && sNewWord[iWordLen-3]==sNewWord[iWordLen-4] && !bIsVowel(sNewWord[iWordLen-4]) && bIsVowel(sNewWord[iWordLen-5])) {//doubled sNewWord[iWordLen-3]='\0'; if( oLib[iLib]->Lookup(sNewWord, iIndex) ) bFound=true; else { if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } if (!bFound) sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore } } if (!bFound) { if (oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } } //cut "ing" if(!bFound && iWordLen>3) { isupcase = !strncmp(&sWord[iWordLen-3],"ING",3); if (isupcase || !strncmp(&sWord[iWordLen-3],"ing",3) ) { strcpy(sNewWord,sWord); sNewWord[iWordLen-3]='\0'; if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5]) && !bIsVowel(sNewWord[iWordLen-5]) && bIsVowel(sNewWord[iWordLen-6])) { //doubled sNewWord[iWordLen-4]='\0'; if (oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else { if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } if (!bFound) sNewWord[iWordLen-4]=sNewWord[iWordLen-5]; //restore } } if( !bFound ) { if (oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } if(!bFound) { if (isupcase) strcat(sNewWord,"E"); // add a char "E" else strcat(sNewWord,"e"); // add a char "e" if(oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } } //cut two char "es" if(!bFound && iWordLen>3) { isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) && (sWord[iWordLen-3] == 'S' || sWord[iWordLen-3] == 'X' || sWord[iWordLen-3] == 'O' || (iWordLen >4 && sWord[iWordLen-3] == 'H' && (sWord[iWordLen-4] == 'C' || sWord[iWordLen-4] == 'S')))); if (isupcase || (!strncmp(&sWord[iWordLen-2],"es",2) && (sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' || sWord[iWordLen-3] == 'o' || (iWordLen >4 && sWord[iWordLen-3] == 'h' && (sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) { strcpy(sNewWord,sWord); sNewWord[iWordLen-2]='\0'; if(oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } //cut "ed" if (!bFound && iWordLen>3) { isupcase = !strncmp(&sWord[iWordLen-2],"ED",2); if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2)) { strcpy(sNewWord,sWord); sNewWord[iWordLen-2]='\0'; if (iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4]) && !bIsVowel(sNewWord[iWordLen-4]) && bIsVowel(sNewWord[iWordLen-5])) {//doubled sNewWord[iWordLen-3]='\0'; if (oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else { if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } if (!bFound) sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore } } if (!bFound) { if (oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } } // cut "ied" , add "y". if (!bFound && iWordLen>3) { isupcase = !strncmp(&sWord[iWordLen-3],"IED",3); if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) { strcpy(sNewWord,sWord); sNewWord[iWordLen-3]='\0'; if (isupcase) strcat(sNewWord,"Y"); // add a char "Y" else strcat(sNewWord,"y"); // add a char "y" if (oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } // cut "ies" , add "y". if (!bFound && iWordLen>3) { isupcase = !strncmp(&sWord[iWordLen-3],"IES",3); if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) { strcpy(sNewWord,sWord); sNewWord[iWordLen-3]='\0'; if (isupcase) strcat(sNewWord,"Y"); // add a char "Y" else strcat(sNewWord,"y"); // add a char "y" if(oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } // cut "er". if (!bFound && iWordLen>2) { isupcase = !strncmp(&sWord[iWordLen-2],"ER",2); if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) { strcpy(sNewWord,sWord); sNewWord[iWordLen-2]='\0'; if(oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } // cut "est". if (!bFound && iWordLen>3) { isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3); if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) { strcpy(sNewWord,sWord); sNewWord[iWordLen-3]='\0'; if(oLib[iLib]->Lookup(sNewWord, iIndex)) bFound=true; else if (isupcase || g_ascii_isupper(sWord[0])) { casestr = g_ascii_strdown(sNewWord, -1); if (strcmp(casestr, sNewWord)) { if(oLib[iLib]->Lookup(casestr, iIndex)) bFound=true; } g_free(casestr); } } } g_free(sNewWord); } if (bFound) iWordIndex = iIndex;#if 0 else { //don't change iWordIndex here. //when LookupSimilarWord all failed too, we want to use the old LookupWord index to list words. //iWordIndex = INVALID_INDEX; }#endif return bFound;}bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib){ bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex); if (!bFound) bFound = LookupSimilarWord(sWord, iWordIndex, iLib); return bFound;}struct Fuzzystruct { char * pMatchWord; int iMatchWordDistance;};inline bool operator<(const Fuzzystruct & lh, const Fuzzystruct & rh) { if (lh.iMatchWordDistance!=rh.iMatchWordDistance) return lh.iMatchWordDistance<rh.iMatchWordDistance; if (lh.pMatchWord && rh.pMatchWord) return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0; return false;}static inline void unicode_strdown(gunichar *str){ while (*str) { *str=g_unichar_tolower(*str); ++str; }}bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size){ if (sWord[0] == '\0') return false; Fuzzystruct oFuzzystruct[reslist_size]; for (int i=0; i<reslist_size; i++) { oFuzzystruct[i].pMatchWord = NULL; oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance; } int iMaxDistance = iMaxFuzzyDistance; int iDistance; bool Found = false; EditDistance oEditDistance; glong iCheckWordLen; const char *sCheck; gunichar *ucs4_str1, *ucs4_str2; glong ucs4_str2_len; ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len); unicode_strdown(ucs4_str2); for (std::vector<Dict *>::size_type iLib=0; iLib<oLib.size(); iLib++) { if (progress_func) progress_func(); //if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) { //there are Chinese dicts and English dicts... if (TRUE) { const int iwords = narticles(iLib); for (int index=0; index<iwords; index++) { sCheck = poGetWord(index,iLib); // tolower and skip too long or too short words iCheckWordLen = g_utf8_strlen(sCheck, -1); if (iCheckWordLen-ucs4_str2_len>=iMaxDistance || ucs4_str2_len-iCheckWordLen>=iMaxDistance) continue; ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, NULL); if (iCheckWordLen > ucs4_str2_len) ucs4_str1[ucs4_str2_len]=0; unicode_strdown(ucs4_str1); iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance); g_free(ucs4_str1); if (iDistance<iMaxDistance && iDistance < ucs4_str2_len) { // when ucs4_str2_len=1,2 we need less fuzzy. Found = true; bool bAlreadyInList = false; int iMaxDistanceAt=0; for (int j=0; j<reslist_size; j++) { if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord,sCheck)==0 ) {//already in list bAlreadyInList = true; break; } //find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time. if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance ) { iMaxDistanceAt = j; } } if (!bAlreadyInList) { if (oFuzzystruct[iMaxDistanceAt].pMatchWord) g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord); oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck); oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance; // calc new iMaxDistance iMaxDistance = iDistance; for (int j=0; j<reslist_size; j++) { if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance) iMaxDistance = oFuzzystruct[j].iMatchWordDistance; } // calc new iMaxDistance } // add to list } // find one } // each word } // ok for search } // each lib g_free(ucs4_str2); if (Found)// sort with distance std::sort(oFuzzystruct, oFuzzystruct+reslist_size); for (gint i=0; i<reslist_size; ++i) reslist[i]=oFuzzystruct[i].pMatchWord; return Found;}inline bool less_for_compare(const char *lh, const char *rh) { return stardict_strcmp(lh, rh)<0;}gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord){ glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1]; gint iMatchCount = 0; GPatternSpec *pspec = g_pattern_spec_new(word); for (std::vector<Dict *>::size_type iLib=0; iLib<oLib.size(); iLib++) { //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib)) // -iMatchCount,so save time,but may got less result and the word may repeat. if (oLib[iLib]->LookupWithRule(pspec,aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) { if (progress_func) progress_func(); for (int i=0; aiIndex[i]!=-1; i++) { const gchar * sMatchWord = poGetWord(aiIndex[i],iLib); bool bAlreadyInList = false; for (int j=0; j<iMatchCount; j++) { if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list bAlreadyInList = true; break; } } if (!bAlreadyInList) ppMatchWord[iMatchCount++] = g_strdup(sMatchWord); } } } g_pattern_spec_free(pspec); if (iMatchCount)// sort it. std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare); return iMatchCount;}bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist){ std::vector<std::string> SearchWords; std::string SearchWord; const char *p=sWord; while (*p) { if (*p=='\\') { p++; switch (*p) { case ' ': SearchWord+=' '; break; case '\\': SearchWord+='\\'; break; case 't': SearchWord+='\t'; break; case 'n': SearchWord+='\n'; break; default: SearchWord+=*p; } } else if (*p == ' ') { if (!SearchWord.empty()) { SearchWords.push_back(SearchWord); SearchWord.clear(); } } else { SearchWord+=*p; } p++; } if (!SearchWord.empty()) { SearchWords.push_back(SearchWord); SearchWord.clear(); } if (SearchWords.empty()) return false; guint32 max_size =0; gchar *origin_data = NULL; for (std::vector<Dict *>::size_type i=0; i<oLib.size(); ++i) { if (!oLib[i]->containSearchData()) continue; if (progress_func) progress_func(); const gulong iwords = narticles(i); const gchar *key; guint32 offset, size; for (gulong j=0; j<iwords; ++j) { oLib[i]->get_key_and_data(j, &key, &offset, &size); if (size>max_size) { origin_data = (gchar *)g_realloc(origin_data, size); max_size = size; } if (oLib[i]->SearchData(SearchWords, offset, size, origin_data)) reslist[i].push_back(g_strdup(key)); } } g_free(origin_data); std::vector<Dict *>::size_type i; for (i=0; i<oLib.size(); ++i) if (!reslist[i].empty()) break; return i!=oLib.size();}/**************************************************/query_t analyze_query(const char *s, std::string& res){ if (!s || !*s) { res=""; return qtSIMPLE; } if (*s=='/') { res=s+1; return qtFUZZY; } if (*s=='|') { res=s+1; return qtDATA; } bool regexp=false; const char *p=s; res=""; for (; *p; res+=*p, ++p) { if (*p=='\\') { ++p; if (!*p) break; continue; } if (*p=='*' || *p=='?') regexp=true; } if (regexp) return qtREGEXP; return qtSIMPLE;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -