📄 lib.cpp
字号:
#ifdef HAVE_CONFIG_H# include "config.h"#endif#include <algorithm>#include <cstring>#include <cctype>#include <sys/stat.h>#include <zlib.h>#include <glib/gstdio.h>#include "distance.h"#include "file.hpp"#include "mapfile.hpp"#include "lib.h"// Notice: read src/tools/DICTFILE_FORMAT for the dictionary // file's format information!static inline bool bIsVowel(gchar inputchar){ gchar ch = g_ascii_toupper(inputchar); return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' );}static bool bIsPureEnglish(const gchar *str) { // i think this should work even when it is UTF8 string :). for (int i=0; str[i]!=0; i++) //if(str[i]<0) //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK. // Better use isascii() but not str[i]<0 while char is default unsigned in arm if (!isascii(str[i])) return false; return true; }static inline gint stardict_strcmp(const gchar *s1, const gchar *s2) { gint a=g_ascii_strcasecmp(s1, s2); if (a == 0) return strcmp(s1, s2); else return a;}bool DictInfo::load_from_ifo_file(const std::string& ifofilename, bool istreedict){ ifo_file_name=ifofilename; gchar *buffer; if (!g_file_get_contents(ifofilename.c_str(), &buffer, NULL, NULL)) return false; #define TREEDICT_MAGIC_DATA "StarDict's treedict ifo file\nversion=2.4.2\n"#define DICT_MAGIC_DATA "StarDict's dict ifo file\nversion=2.4.2\n" const gchar *magic_data=istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA; if (!g_str_has_prefix(buffer, magic_data)) { g_free(buffer); return false; } gchar *p1,*p2,*p3; p1 = buffer + strlen(magic_data)-1; p2 = strstr(p1,"\nwordcount="); if (!p2) { g_free(buffer); return false; } p3 = strchr(p2+ sizeof("\nwordcount=")-1,'\n'); gchar *tmpstr = (gchar *)g_memdup(p2+sizeof("\nwordcount=")-1, p3-(p2+sizeof("\nwordcount=")-1)+1); tmpstr[p3-(p2+sizeof("\nwordcount=")-1)] = '\0'; wordcount = atol(tmpstr); g_free(tmpstr); if (istreedict) { p2 = strstr(p1,"\ntdxfilesize="); if (!p2) { g_free(buffer); return false; } p3 = strchr(p2+ sizeof("\ntdxfilesize=")-1,'\n'); tmpstr = (gchar *)g_memdup(p2+sizeof("\ntdxfilesize=")-1, p3-(p2+sizeof("\ntdxfilesize=")-1)+1); tmpstr[p3-(p2+sizeof("\ntdxfilesize=")-1)] = '\0'; index_file_size = atol(tmpstr); g_free(tmpstr); } else { p2 = strstr(p1,"\nidxfilesize="); if (!p2) { g_free(buffer); return false; } p3 = strchr(p2+ sizeof("\nidxfilesize=")-1,'\n'); tmpstr = (gchar *)g_memdup(p2+sizeof("\nidxfilesize=")-1, p3-(p2+sizeof("\nidxfilesize=")-1)+1); tmpstr[p3-(p2+sizeof("\nidxfilesize=")-1)] = '\0'; index_file_size = atol(tmpstr); g_free(tmpstr); } p2 = strstr(p1,"\nbookname="); if (!p2) { g_free(buffer); return false; } p2 = p2 + sizeof("\nbookname=") -1; p3 = strchr(p2, '\n'); bookname.assign(p2, p3-p2); p2 = strstr(p1,"\nauthor="); if (p2) { p2 = p2 + sizeof("\nauthor=") -1; p3 = strchr(p2, '\n'); author.assign(p2, p3-p2); } p2 = strstr(p1,"\nemail="); if (p2) { p2 = p2 + sizeof("\nemail=") -1; p3 = strchr(p2, '\n'); email.assign(p2, p3-p2); } p2 = strstr(p1,"\nwebsite="); if (p2) { p2 = p2 + sizeof("\nwebsite=") -1; p3 = strchr(p2, '\n'); website.assign(p2, p3-p2); } p2 = strstr(p1,"\ndate="); if (p2) { p2 = p2 + sizeof("\ndate=") -1; p3 = strchr(p2, '\n'); date.assign(p2, p3-p2); } p2 = strstr(p1,"\ndescription="); if (p2) { p2 = p2 + sizeof("\ndescription=")-1; p3 = strchr(p2, '\n'); description.assign(p2, p3-p2); } p2 = strstr(p1,"\nsametypesequence="); if (p2) { p2+=sizeof("\nsametypesequence=")-1; p3 = strchr(p2, '\n'); sametypesequence.assign(p2, p3-p2); } g_free(buffer); return true; }//===================================================================DictBase::DictBase(){ dictfile = NULL; cache_cur =0;}DictBase::~DictBase(){ if (dictfile) fclose(dictfile);}gchar* DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size){ for (int i=0; i<WORDDATA_CACHE_NUM; i++) if (cache[i].data && cache[i].offset == idxitem_offset) return cache[i].data; if (dictfile) fseek(dictfile, idxitem_offset, SEEK_SET); gchar *data; if (!sametypesequence.empty()) { gchar *origin_data = (gchar *)g_malloc(idxitem_size); if (dictfile) fread(origin_data, idxitem_size, 1, dictfile); else dictdzfile->read(origin_data, idxitem_offset, idxitem_size); guint32 data_size; gint sametypesequence_len = sametypesequence.length(); //there have sametypesequence_len char being omitted. data_size = idxitem_size + sizeof(guint32) + sametypesequence_len; //if the last item's size is determined by the end up '\0',then +=sizeof(gchar); //if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32); switch (sametypesequence[sametypesequence_len-1]) { case 'm': case 't': case 'y': case 'l': case 'g': case 'x': data_size += sizeof(gchar); break; case 'W': case 'P': data_size += sizeof(guint32); break; default: if (g_ascii_isupper(sametypesequence[sametypesequence_len-1])) data_size += sizeof(guint32); else data_size += sizeof(gchar); break; } data = (gchar *)g_malloc(data_size); gchar *p1,*p2; p1 = data + sizeof(guint32); p2 = origin_data; guint32 sec_size; //copy the head items. for (int i=0; i<sametypesequence_len-1; i++) { *p1=sametypesequence[i]; p1+=sizeof(gchar); switch (sametypesequence[i]) { case 'm': case 't': case 'y': case 'l': case 'g': case 'x': sec_size = strlen(p2)+1; memcpy(p1, p2, sec_size); p1+=sec_size; p2+=sec_size; break; case 'W': case 'P': sec_size = *reinterpret_cast<guint32 *>(p2); sec_size += sizeof(guint32); memcpy(p1, p2, sec_size); p1+=sec_size; p2+=sec_size; break; default: if (g_ascii_isupper(sametypesequence[i])) { sec_size = *reinterpret_cast<guint32 *>(p2); sec_size += sizeof(guint32); } else { sec_size = strlen(p2)+1; } memcpy(p1, p2, sec_size); p1+=sec_size; p2+=sec_size; break; } } //calculate the last item 's size. sec_size = idxitem_size - (p2-origin_data); *p1=sametypesequence[sametypesequence_len-1]; p1+=sizeof(gchar); switch (sametypesequence[sametypesequence_len-1]) { case 'm': case 't': case 'y': case 'l': case 'g': case 'x': memcpy(p1, p2, sec_size); p1 += sec_size; *p1='\0';//add the end up '\0'; break; case 'W': case 'P': *reinterpret_cast<guint32 *>(p1)=sec_size; p1 += sizeof(guint32); memcpy(p1, p2, sec_size); break; default: if (g_ascii_isupper(sametypesequence[sametypesequence_len-1])) { *reinterpret_cast<guint32 *>(p1)=sec_size; p1 += sizeof(guint32); memcpy(p1, p2, sec_size); } else { memcpy(p1, p2, sec_size); p1 += sec_size; *p1='\0'; } break; } g_free(origin_data); *reinterpret_cast<guint32 *>(data)=data_size; } else { data = (gchar *)g_malloc(idxitem_size + sizeof(guint32)); if (dictfile) fread(data+sizeof(guint32), idxitem_size, 1, dictfile); else dictdzfile->read(data+sizeof(guint32), idxitem_offset, idxitem_size); *reinterpret_cast<guint32 *>(data)=idxitem_size+sizeof(guint32); } g_free(cache[cache_cur].data); cache[cache_cur].data = data; cache[cache_cur].offset = idxitem_offset; cache_cur++; if (cache_cur==WORDDATA_CACHE_NUM) cache_cur = 0; return data;}inline bool DictBase::containSearchData(){ if (sametypesequence.empty()) return true; return sametypesequence.find_first_of("mlgxty")!=std::string::npos;}bool DictBase::SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data){ int nWord = SearchWords.size(); std::vector<bool> WordFind(nWord, false); int nfound=0; if (dictfile) fseek(dictfile, idxitem_offset, SEEK_SET); if (dictfile) fread(origin_data, idxitem_size, 1, dictfile); else dictdzfile->read(origin_data, idxitem_offset, idxitem_size); gchar *p = origin_data; guint32 sec_size; int j; if (!sametypesequence.empty()) { gint sametypesequence_len = sametypesequence.length(); for (int i=0; i<sametypesequence_len-1; i++) { switch (sametypesequence[i]) { case 'm': case 't': case 'y': case 'l': case 'g': case 'x': for (j=0; j<nWord; j++) if (!WordFind[j] && strstr(p, SearchWords[j].c_str())) { WordFind[j] = true; ++nfound; } if (nfound==nWord) return true; sec_size = strlen(p)+1; p+=sec_size; break; default: if (g_ascii_isupper(sametypesequence[i])) { sec_size = *reinterpret_cast<guint32 *>(p); sec_size += sizeof(guint32); } else { sec_size = strlen(p)+1; } p+=sec_size; } } switch (sametypesequence[sametypesequence_len-1]) { case 'm': case 't': case 'y': case 'l': case 'g': case 'x': sec_size = idxitem_size - (p-origin_data); for (j=0; j<nWord; j++) if (!WordFind[j] && g_strstr_len(p, sec_size, SearchWords[j].c_str())) { WordFind[j] = true; ++nfound; } if (nfound==nWord) return true; break; } } else { while (guint32(p - origin_data)<idxitem_size) { switch (*p) { case 'm': case 't': case 'y': case 'l': case 'g': case 'x': for (j=0; j<nWord; j++) if (!WordFind[j] && strstr(p, SearchWords[j].c_str())) { WordFind[j] = true; ++nfound; } if (nfound==nWord) return true; sec_size = strlen(p)+1; p+=sec_size; break; default: if (g_ascii_isupper(*p)) { sec_size = *reinterpret_cast<guint32 *>(p); sec_size += sizeof(guint32); } else { sec_size = strlen(p)+1; } p+=sec_size; } } } return false;}class offset_index : public index_file {public: offset_index() : idxfile(NULL) {} ~offset_index(); bool load(const std::string& url, gulong wc, gulong fsize); const gchar *get_key(glong idx); void get_data(glong idx); const gchar *get_key_and_data(glong idx); bool lookup(const char *str, glong &idx);private: static const gint ENTR_PER_PAGE=32; static const char *CACHE_MAGIC; std::vector<guint32> wordoffset; FILE *idxfile; gulong wordcount; gchar wordentry_buf[256+sizeof(guint32)*2]; // The length of "word_str" should be less than 256. See src/tools/DICTFILE_FORMAT. struct index_entry { glong idx; std::string keystr; void assign(glong i, const std::string& str) { idx=i; keystr.assign(str); } }; index_entry first, last, middle, real_last; struct page_entry { gchar *keystr; guint32 off, size; }; std::vector<gchar> page_data; struct page_t { glong idx; page_entry entries[ENTR_PER_PAGE]; page_t(): idx(-1) {} void fill(gchar *data, gint nent, glong idx_); } page; gulong load_page(glong page_idx); const gchar *read_first_on_page_key(glong page_idx); const gchar *get_first_on_page_key(glong page_idx); bool load_cache(const std::string& url); bool save_cache(const std::string& url); static strlist_t get_cache_variant(const std::string& url);};const char *offset_index::CACHE_MAGIC="StarDict's Cache, Version: 0.1";class wordlist_index : public index_file {public: wordlist_index() : idxdatabuf(NULL) {} ~wordlist_index(); bool load(const std::string& url, gulong wc, gulong fsize); const gchar *get_key(glong idx); void get_data(glong idx); const gchar *get_key_and_data(glong idx); bool lookup(const char *str, glong &idx);private: gchar *idxdatabuf; std::vector<gchar *> wordlist;};void offset_index::page_t::fill(gchar *data, gint nent, glong idx_) { idx=idx_; gchar *p=data; glong len; for (gint i=0; i<nent; ++i) { entries[i].keystr=p; len=strlen(p); p+=len+1; entries[i].off=g_ntohl(*reinterpret_cast<guint32 *>(p)); p+=sizeof(guint32); entries[i].size=g_ntohl(*reinterpret_cast<guint32 *>(p)); p+=sizeof(guint32); }}offset_index::~offset_index(){ if (idxfile) fclose(idxfile);}inline const gchar *offset_index::read_first_on_page_key(glong page_idx){ fseek(idxfile, wordoffset[page_idx], SEEK_SET); guint32 page_size=wordoffset[page_idx+1]-wordoffset[page_idx]; fread(wordentry_buf, std::min(sizeof(wordentry_buf), page_size), 1, idxfile); //TODO: check returned values, deal with word entry that strlen>255. return wordentry_buf;}inline const gchar *offset_index::get_first_on_page_key(glong page_idx){ if (page_idx<middle.idx) { if (page_idx==first.idx) return first.keystr.c_str(); return read_first_on_page_key(page_idx); } else if (page_idx>middle.idx) { if (page_idx==last.idx) return last.keystr.c_str(); return read_first_on_page_key(page_idx); } else return middle.keystr.c_str();}bool offset_index::load_cache(const std::string& url){ strlist_t vars=get_cache_variant(url); for (strlist_t::const_iterator it=vars.begin(); it!=vars.end(); ++it) { struct stat idxstat, cachestat; if (g_stat(url.c_str(), &idxstat)!=0 || g_stat(it->c_str(), &cachestat)!=0) continue; if (cachestat.st_mtime<idxstat.st_mtime) continue; MapFile mf; if (!mf.open(it->c_str(), cachestat.st_size)) continue; if (strncmp(mf.begin(), CACHE_MAGIC, strlen(CACHE_MAGIC))!=0) continue; memcpy(&wordoffset[0], mf.begin()+strlen(CACHE_MAGIC), wordoffset.size()*sizeof(wordoffset[0])); return true; } return false;}strlist_t offset_index::get_cache_variant(const std::string& url){ strlist_t res; res.push_back(url+".oft"); if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) && g_mkdir(g_get_user_cache_dir(), 0700)==-1) return res;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -