📄 lib.cpp

📁 使用Qt4编写的星际译王（stardict）
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
					bFound=true;			}			g_free(casestr);		}		}	  if (bIsPureEnglish(sWord)) {				// If not Found , try other status of sWord.		int iWordLen=strlen(sWord);    bool isupcase;				gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);		//cut one char "s" or "d"		if(!bFound && iWordLen>1) {      isupcase = sWord[iWordLen-1]=='S' || !strncmp(&sWord[iWordLen-2],"ED",2);      if (isupcase || sWord[iWordLen-1]=='s' || !strncmp(&sWord[iWordLen-2],"ed",2)) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-1]='\0'; // cut "s" or "d"				if (oLib[iLib]->Lookup(sNewWord, iIndex))					bFound=true;				else if (isupcase || g_ascii_isupper(sWord[0])) {					casestr = g_ascii_strdown(sNewWord, -1);					if (strcmp(casestr, sNewWord)) {						if(oLib[iLib]->Lookup(casestr, iIndex))							bFound=true;					}					g_free(casestr);				}			}		}				//cut "ly"		if(!bFound && iWordLen>2) {			isupcase = !strncmp(&sWord[iWordLen-2],"LY",2);			if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-2]='\0';  // cut "ly"				if (iWordLen>5 && sNewWord[iWordLen-3]==sNewWord[iWordLen-4]						&& !bIsVowel(sNewWord[iWordLen-4]) && 						bIsVowel(sNewWord[iWordLen-5])) {//doubled		      					sNewWord[iWordLen-3]='\0';					if( oLib[iLib]->Lookup(sNewWord, iIndex) )						bFound=true;					else {						if (isupcase || g_ascii_isupper(sWord[0])) {							casestr = g_ascii_strdown(sNewWord, -1);							if (strcmp(casestr, sNewWord)) {								if(oLib[iLib]->Lookup(casestr, iIndex))									bFound=true;							}							g_free(casestr);						}						if (!bFound)							sNewWord[iWordLen-3]=sNewWord[iWordLen-4];  //restore					}					                    					}				if (!bFound) {					if (oLib[iLib]->Lookup(sNewWord, iIndex))						bFound=true;					else if (isupcase || g_ascii_isupper(sWord[0])) {						casestr = g_ascii_strdown(sNewWord, -1);						if (strcmp(casestr, sNewWord)) {							if(oLib[iLib]->Lookup(casestr, iIndex))								bFound=true;						}						g_free(casestr);					}				}			}		}				//cut "ing"		if(!bFound && iWordLen>3) {			isupcase = !strncmp(&sWord[iWordLen-3],"ING",3);			if (isupcase || !strncmp(&sWord[iWordLen-3],"ing",3) ) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-3]='\0';				if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5])						 && !bIsVowel(sNewWord[iWordLen-5]) && 						 bIsVowel(sNewWord[iWordLen-6])) {  //doubled	  					sNewWord[iWordLen-4]='\0';					if (oLib[iLib]->Lookup(sNewWord, iIndex))						bFound=true;					else {						if (isupcase || g_ascii_isupper(sWord[0])) {							casestr = g_ascii_strdown(sNewWord, -1);							if (strcmp(casestr, sNewWord)) {								if(oLib[iLib]->Lookup(casestr, iIndex))									bFound=true;							}							g_free(casestr);						}						if (!bFound)							sNewWord[iWordLen-4]=sNewWord[iWordLen-5];  //restore					}				}				if( !bFound ) {					if (oLib[iLib]->Lookup(sNewWord, iIndex))						bFound=true;					else if (isupcase || g_ascii_isupper(sWord[0])) {						casestr = g_ascii_strdown(sNewWord, -1);						if (strcmp(casestr, sNewWord)) {							if(oLib[iLib]->Lookup(casestr, iIndex))								bFound=true;						}						g_free(casestr);					}										}				if(!bFound) {					if (isupcase)						strcat(sNewWord,"E"); // add a char "E"					else						strcat(sNewWord,"e"); // add a char "e"					if(oLib[iLib]->Lookup(sNewWord, iIndex))						bFound=true;					else if (isupcase || g_ascii_isupper(sWord[0])) {						casestr = g_ascii_strdown(sNewWord, -1);						if (strcmp(casestr, sNewWord)) {							if(oLib[iLib]->Lookup(casestr, iIndex))								bFound=true;						}						g_free(casestr);					}										}			}		}				//cut two char "es"		if(!bFound && iWordLen>3) {			isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) && 									(sWord[iWordLen-3] == 'S' || 									 sWord[iWordLen-3] == 'X' || 									 sWord[iWordLen-3] == 'O' || 									 (iWordLen >4 && sWord[iWordLen-3] == 'H' && 										(sWord[iWordLen-4] == 'C' || 										 sWord[iWordLen-4] == 'S'))));			if (isupcase || 					(!strncmp(&sWord[iWordLen-2],"es",2) && 	   (sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' || 	    sWord[iWordLen-3] == 'o' || 	    (iWordLen >4 && sWord[iWordLen-3] == 'h' && 	     (sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-2]='\0';				if(oLib[iLib]->Lookup(sNewWord, iIndex))					bFound=true;				else if (isupcase || g_ascii_isupper(sWord[0])) {					casestr = g_ascii_strdown(sNewWord, -1);					if (strcmp(casestr, sNewWord)) {						if(oLib[iLib]->Lookup(casestr, iIndex))							bFound=true;					}					g_free(casestr);				}			}		}				//cut "ed"    if (!bFound && iWordLen>3) {			isupcase = !strncmp(&sWord[iWordLen-2],"ED",2);      if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2)) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-2]='\0';				if (iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4])						&& !bIsVowel(sNewWord[iWordLen-4]) && 						bIsVowel(sNewWord[iWordLen-5])) {//doubled	            					sNewWord[iWordLen-3]='\0';					if (oLib[iLib]->Lookup(sNewWord, iIndex))						bFound=true;					else {						if (isupcase || g_ascii_isupper(sWord[0])) {							casestr = g_ascii_strdown(sNewWord, -1);							if (strcmp(casestr, sNewWord)) {								if(oLib[iLib]->Lookup(casestr, iIndex))									bFound=true;							}							g_free(casestr);						}						if (!bFound)							sNewWord[iWordLen-3]=sNewWord[iWordLen-4];  //restore					}				}				if (!bFound) {					if (oLib[iLib]->Lookup(sNewWord, iIndex))						bFound=true;					else if (isupcase || g_ascii_isupper(sWord[0])) {						casestr = g_ascii_strdown(sNewWord, -1);						if (strcmp(casestr, sNewWord)) {							if(oLib[iLib]->Lookup(casestr, iIndex))								bFound=true;						}						g_free(casestr);					}				}			}		}				// cut "ied" , add "y".    if (!bFound && iWordLen>3) {			isupcase = !strncmp(&sWord[iWordLen-3],"IED",3);      if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-3]='\0';				if (isupcase)					strcat(sNewWord,"Y"); // add a char "Y"				else					strcat(sNewWord,"y"); // add a char "y"				if (oLib[iLib]->Lookup(sNewWord, iIndex))					bFound=true;				else if (isupcase || g_ascii_isupper(sWord[0])) {					casestr = g_ascii_strdown(sNewWord, -1);					if (strcmp(casestr, sNewWord)) {						if(oLib[iLib]->Lookup(casestr, iIndex))							bFound=true;					}					g_free(casestr);				}			}		}    		// cut "ies" , add "y".    if (!bFound && iWordLen>3) {			isupcase = !strncmp(&sWord[iWordLen-3],"IES",3);      if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-3]='\0';				if (isupcase)					strcat(sNewWord,"Y"); // add a char "Y"				else					strcat(sNewWord,"y"); // add a char "y"				if(oLib[iLib]->Lookup(sNewWord, iIndex))					bFound=true;				else if (isupcase || g_ascii_isupper(sWord[0])) {					casestr = g_ascii_strdown(sNewWord, -1);					if (strcmp(casestr, sNewWord)) {						if(oLib[iLib]->Lookup(casestr, iIndex))							bFound=true;					}					g_free(casestr);				}			}		}		// cut "er".    if (!bFound && iWordLen>2) {			isupcase = !strncmp(&sWord[iWordLen-2],"ER",2);      if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-2]='\0';				if(oLib[iLib]->Lookup(sNewWord, iIndex))					bFound=true;				else if (isupcase || g_ascii_isupper(sWord[0])) {					casestr = g_ascii_strdown(sNewWord, -1);					if (strcmp(casestr, sNewWord)) {						if(oLib[iLib]->Lookup(casestr, iIndex))							bFound=true;					}					g_free(casestr);				}			}		}				// cut "est".    if (!bFound && iWordLen>3) {			isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3);      if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) {				strcpy(sNewWord,sWord);				sNewWord[iWordLen-3]='\0';				if(oLib[iLib]->Lookup(sNewWord, iIndex))					bFound=true;				else if (isupcase || g_ascii_isupper(sWord[0])) {					casestr = g_ascii_strdown(sNewWord, -1);					if (strcmp(casestr, sNewWord)) {						if(oLib[iLib]->Lookup(casestr, iIndex))							bFound=true;					}					g_free(casestr);				}			}		}				g_free(sNewWord);	}			if (bFound)		iWordIndex = iIndex;#if 0	else {		//don't change iWordIndex here.		//when LookupSimilarWord all failed too, we want to use the old LookupWord index to list words.		//iWordIndex = INVALID_INDEX;	}#endif  return bFound;}bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib){  bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex);	if (!bFound)		bFound = LookupSimilarWord(sWord, iWordIndex, iLib);	return bFound;}struct Fuzzystruct {	char * pMatchWord;	int iMatchWordDistance;};inline bool operator<(const Fuzzystruct & lh, const Fuzzystruct & rh) {  if (lh.iMatchWordDistance!=rh.iMatchWordDistance)    return lh.iMatchWordDistance<rh.iMatchWordDistance;  if (lh.pMatchWord && rh.pMatchWord)    return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0;    return false;}static inline void unicode_strdown(gunichar *str){	while (*str) {		*str=g_unichar_tolower(*str);		++str;	}}bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size){  if (sWord[0] == '\0')    return false;	Fuzzystruct oFuzzystruct[reslist_size];         for (int i=0; i<reslist_size; i++) {    oFuzzystruct[i].pMatchWord = NULL;    oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance;  }  int iMaxDistance = iMaxFuzzyDistance;  int iDistance;  bool Found = false;  EditDistance oEditDistance;  glong iCheckWordLen;  const char *sCheck;  gunichar *ucs4_str1, *ucs4_str2;  glong ucs4_str2_len;  ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len);	unicode_strdown(ucs4_str2);  for (std::vector<Dict *>::size_type iLib=0; iLib<oLib.size(); iLib++) {    if (progress_func)      progress_func();    //if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) {      //there are Chinese dicts and English dicts...    if (TRUE) {      const int iwords = narticles(iLib);      for (int index=0; index<iwords; index++) {				sCheck = poGetWord(index,iLib);				// tolower and skip too long or too short words				iCheckWordLen = g_utf8_strlen(sCheck, -1);				if (iCheckWordLen-ucs4_str2_len>=iMaxDistance || 						ucs4_str2_len-iCheckWordLen>=iMaxDistance)					continue;				ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, NULL);				if (iCheckWordLen > ucs4_str2_len)					ucs4_str1[ucs4_str2_len]=0;				unicode_strdown(ucs4_str1);								iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance);				g_free(ucs4_str1);				if (iDistance<iMaxDistance && iDistance < ucs4_str2_len) {					// when ucs4_str2_len=1,2 we need less fuzzy.					Found = true;					bool bAlreadyInList = false;					int iMaxDistanceAt=0;					for (int j=0; j<reslist_size; j++) {						if (oFuzzystruct[j].pMatchWord && 								strcmp(oFuzzystruct[j].pMatchWord,sCheck)==0 ) {//already in list							bAlreadyInList = true;							break;						}						//find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.						if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance ) {							iMaxDistanceAt = j;						}					}					if (!bAlreadyInList) {						if (oFuzzystruct[iMaxDistanceAt].pMatchWord)							g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord);						oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck);						oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance;						// calc new iMaxDistance						iMaxDistance = iDistance;						for (int j=0; j<reslist_size; j++) {							if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance)								iMaxDistance = oFuzzystruct[j].iMatchWordDistance;						} // calc new iMaxDistance					}   // add to list				}   // find one      }   // each word    }   // ok for search  }   // each lib  g_free(ucs4_str2);	  if (Found)// sort with distance    std::sort(oFuzzystruct, oFuzzystruct+reslist_size);		for (gint i=0; i<reslist_size; ++i)    reslist[i]=oFuzzystruct[i].pMatchWord;	  return Found;}inline bool less_for_compare(const char *lh, const char *rh) {  return stardict_strcmp(lh, rh)<0;}gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord){	  glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];  gint iMatchCount = 0;  GPatternSpec *pspec = g_pattern_spec_new(word);	  for (std::vector<Dict *>::size_type iLib=0; iLib<oLib.size(); iLib++) {    //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))     // -iMatchCount,so save time,but may got less result and the word may repeat.        if (oLib[iLib]->LookupWithRule(pspec,aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {      if (progress_func)				progress_func();      for (int i=0; aiIndex[i]!=-1; i++) {				const gchar * sMatchWord = poGetWord(aiIndex[i],iLib);				bool bAlreadyInList = false;				for (int j=0; j<iMatchCount; j++) {					if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list						bAlreadyInList = true;						break;					}				}				if (!bAlreadyInList)					ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);      }    }  }  g_pattern_spec_free(pspec);	  if (iMatchCount)// sort it.    std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare); 	  return iMatchCount;}bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist){	std::vector<std::string> SearchWords;	std::string SearchWord;        const char *p=sWord;        while (*p) {                if (*p=='\\') {                        p++;			switch (*p) {			case ' ':				SearchWord+=' ';				break;			case '\\':				SearchWord+='\\';				break;			case 't':				SearchWord+='\t';				break;			case 'n':				SearchWord+='\n';				break;			default:				SearchWord+=*p;			}                } else if (*p == ' ') {			if (!SearchWord.empty()) {				SearchWords.push_back(SearchWord);				SearchWord.clear();			}		} else {			SearchWord+=*p;		}		p++;        }	if (!SearchWord.empty()) {		SearchWords.push_back(SearchWord);		SearchWord.clear();	}	if (SearchWords.empty())		return false;	guint32 max_size =0;	gchar *origin_data = NULL;	for (std::vector<Dict *>::size_type i=0; i<oLib.size(); ++i) {		if (!oLib[i]->containSearchData())			continue;		if (progress_func)			progress_func();		const gulong iwords = narticles(i);		const gchar *key;		guint32 offset, size;		for (gulong j=0; j<iwords; ++j) {			oLib[i]->get_key_and_data(j, &key, &offset, &size);			if (size>max_size) {				origin_data = (gchar *)g_realloc(origin_data, size);				max_size = size;			}			if (oLib[i]->SearchData(SearchWords, offset, size, origin_data))				reslist[i].push_back(g_strdup(key));		}	}	g_free(origin_data);	std::vector<Dict *>::size_type i;	for (i=0; i<oLib.size(); ++i)		if (!reslist[i].empty())			break;				return i!=oLib.size();}/**************************************************/query_t analyze_query(const char *s, std::string& res){	if (!s || !*s) {		res="";		return qtSIMPLE;	}	if (*s=='/') {		res=s+1;		return qtFUZZY;	}	if (*s=='|') {		res=s+1;		return qtDATA;	}	bool regexp=false;	const char *p=s;	res="";	for (; *p; res+=*p, ++p) {		if (*p=='\\') {			++p;			if (!*p)				break;			continue;		}		if (*p=='*' || *p=='?')			regexp=true;	}	if (regexp)		return qtREGEXP;	return qtSIMPLE;}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -