📄 lib.cpp

📁 使用Qt4编写的星际译王（stardict）
💻 CPP
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
#ifdef HAVE_CONFIG_H#  include "config.h"#endif#include <algorithm>#include <cstring>#include <cctype>#include <sys/stat.h>#include <zlib.h>#include <glib/gstdio.h>#include "distance.h"#include "file.hpp"#include "mapfile.hpp"#include "lib.h"// Notice: read src/tools/DICTFILE_FORMAT for the dictionary // file's format information!static inline bool bIsVowel(gchar inputchar){  gchar ch = g_ascii_toupper(inputchar);  return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' );}static bool bIsPureEnglish(const gchar *str) {   // i think this should work even when it is UTF8 string :).  for (int i=0; str[i]!=0; i++)     //if(str[i]<0)    //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.    // Better use isascii() but not str[i]<0 while char is default unsigned in arm    if (!isascii(str[i]))             return false;              return true;	}static inline gint stardict_strcmp(const gchar *s1, const gchar *s2) {  gint a=g_ascii_strcasecmp(s1, s2);  if (a == 0)    return strcmp(s1, s2);  else    return a;}bool DictInfo::load_from_ifo_file(const std::string& ifofilename,																	bool istreedict){  ifo_file_name=ifofilename;  gchar *buffer;  if (!g_file_get_contents(ifofilename.c_str(), &buffer, NULL, NULL))    return false;  #define TREEDICT_MAGIC_DATA "StarDict's treedict ifo file\nversion=2.4.2\n"#define DICT_MAGIC_DATA "StarDict's dict ifo file\nversion=2.4.2\n"  const gchar *magic_data=istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;  if (!g_str_has_prefix(buffer, magic_data)) {    g_free(buffer);    return false;  }  gchar *p1,*p2,*p3;    p1 = buffer + strlen(magic_data)-1;  p2 = strstr(p1,"\nwordcount=");  if (!p2) {    g_free(buffer);    return false;  }    p3 = strchr(p2+ sizeof("\nwordcount=")-1,'\n');  gchar *tmpstr = (gchar *)g_memdup(p2+sizeof("\nwordcount=")-1, p3-(p2+sizeof("\nwordcount=")-1)+1);  tmpstr[p3-(p2+sizeof("\nwordcount=")-1)] = '\0';  wordcount = atol(tmpstr);  g_free(tmpstr);  if (istreedict) {    p2 = strstr(p1,"\ntdxfilesize=");    if (!p2) {      g_free(buffer);      return false;    }    p3 = strchr(p2+ sizeof("\ntdxfilesize=")-1,'\n');    tmpstr = (gchar *)g_memdup(p2+sizeof("\ntdxfilesize=")-1, p3-(p2+sizeof("\ntdxfilesize=")-1)+1);    tmpstr[p3-(p2+sizeof("\ntdxfilesize=")-1)] = '\0';    index_file_size = atol(tmpstr);    g_free(tmpstr);  } else {      p2 = strstr(p1,"\nidxfilesize=");    if (!p2) {      g_free(buffer);      return false;    }        p3 = strchr(p2+ sizeof("\nidxfilesize=")-1,'\n');    tmpstr = (gchar *)g_memdup(p2+sizeof("\nidxfilesize=")-1, p3-(p2+sizeof("\nidxfilesize=")-1)+1);    tmpstr[p3-(p2+sizeof("\nidxfilesize=")-1)] = '\0';    index_file_size = atol(tmpstr);    g_free(tmpstr);  }	  p2 = strstr(p1,"\nbookname=");  if (!p2) {    g_free(buffer);    return false;  }  p2 = p2 + sizeof("\nbookname=") -1;  p3 = strchr(p2, '\n');  bookname.assign(p2, p3-p2);  p2 = strstr(p1,"\nauthor=");  if (p2) {    p2 = p2 + sizeof("\nauthor=") -1;    p3 = strchr(p2, '\n');    author.assign(p2, p3-p2);  }  p2 = strstr(p1,"\nemail=");  if (p2) {    p2 = p2 + sizeof("\nemail=") -1;    p3 = strchr(p2, '\n');    email.assign(p2, p3-p2);  }  p2 = strstr(p1,"\nwebsite=");  if (p2) {    p2 = p2 + sizeof("\nwebsite=") -1;    p3 = strchr(p2, '\n');    website.assign(p2, p3-p2);  }  p2 = strstr(p1,"\ndate=");  if (p2) {    p2 = p2 + sizeof("\ndate=") -1;    p3 = strchr(p2, '\n');    date.assign(p2, p3-p2);  }  p2 = strstr(p1,"\ndescription=");  if (p2) {    p2 = p2 + sizeof("\ndescription=")-1;    p3 = strchr(p2, '\n');    description.assign(p2, p3-p2);  }  p2 = strstr(p1,"\nsametypesequence=");  if (p2) {		    p2+=sizeof("\nsametypesequence=")-1;    p3 = strchr(p2, '\n');    sametypesequence.assign(p2, p3-p2);  }  g_free(buffer);  return true;		}//===================================================================DictBase::DictBase(){		dictfile = NULL;		cache_cur =0;}DictBase::~DictBase(){		if (dictfile)		fclose(dictfile);}gchar* DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size){  for (int i=0; i<WORDDATA_CACHE_NUM; i++)	    if (cache[i].data && cache[i].offset == idxitem_offset)      return cache[i].data;    if (dictfile)    fseek(dictfile, idxitem_offset, SEEK_SET);    gchar *data;  if (!sametypesequence.empty()) {    gchar *origin_data = (gchar *)g_malloc(idxitem_size);        if (dictfile)      fread(origin_data, idxitem_size, 1, dictfile);    else      dictdzfile->read(origin_data, idxitem_offset, idxitem_size);        guint32 data_size;    gint sametypesequence_len = sametypesequence.length();    //there have sametypesequence_len char being omitted.    data_size = idxitem_size + sizeof(guint32) + sametypesequence_len;    //if the last item's size is determined by the end up '\0',then +=sizeof(gchar);    //if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32);    switch (sametypesequence[sametypesequence_len-1]) {    case 'm':    case 't':    case 'y':    case 'l':    case 'g':    case 'x':      data_size += sizeof(gchar);      break;    case 'W':    case 'P':      data_size += sizeof(guint32);      break;    default:      if (g_ascii_isupper(sametypesequence[sametypesequence_len-1]))        data_size += sizeof(guint32);      else        data_size += sizeof(gchar);      break;    }			    data = (gchar *)g_malloc(data_size);    gchar *p1,*p2;    p1 = data + sizeof(guint32);    p2 = origin_data;    guint32 sec_size;    //copy the head items.    for (int i=0; i<sametypesequence_len-1; i++) {      *p1=sametypesequence[i];      p1+=sizeof(gchar);      switch (sametypesequence[i]) {      case 'm':      case 't':      case 'y':      case 'l':      case 'g':      case 'x':				sec_size = strlen(p2)+1;				memcpy(p1, p2, sec_size);				p1+=sec_size;				p2+=sec_size;				break;      case 'W':      case 'P':				sec_size = *reinterpret_cast<guint32 *>(p2);				sec_size += sizeof(guint32);				memcpy(p1, p2, sec_size);				p1+=sec_size;				p2+=sec_size;				break;      default:				if (g_ascii_isupper(sametypesequence[i])) {					sec_size = *reinterpret_cast<guint32 *>(p2);					sec_size += sizeof(guint32);				} else {					sec_size = strlen(p2)+1;				}				memcpy(p1, p2, sec_size);				p1+=sec_size;				p2+=sec_size;				break;      }							    }	    //calculate the last item 's size.    sec_size = idxitem_size - (p2-origin_data);    *p1=sametypesequence[sametypesequence_len-1];    p1+=sizeof(gchar);    switch (sametypesequence[sametypesequence_len-1]) {    case 'm':    case 't':    case 'y':    case 'l':    case 'g':    case 'x':      memcpy(p1, p2, sec_size);      p1 += sec_size;				      *p1='\0';//add the end up '\0';      break;    case 'W':    case 'P':      *reinterpret_cast<guint32 *>(p1)=sec_size;      p1 += sizeof(guint32);      memcpy(p1, p2, sec_size);      break;    default:      if (g_ascii_isupper(sametypesequence[sametypesequence_len-1])) {        *reinterpret_cast<guint32 *>(p1)=sec_size;        p1 += sizeof(guint32);        memcpy(p1, p2, sec_size);      } else {        memcpy(p1, p2, sec_size);        p1 += sec_size;        *p1='\0';      }      break;    }    g_free(origin_data);		    *reinterpret_cast<guint32 *>(data)=data_size;  } else {		    data = (gchar *)g_malloc(idxitem_size + sizeof(guint32));    if (dictfile)      fread(data+sizeof(guint32), idxitem_size, 1, dictfile);		    else      dictdzfile->read(data+sizeof(guint32), idxitem_offset, idxitem_size);    *reinterpret_cast<guint32 *>(data)=idxitem_size+sizeof(guint32);  }	  g_free(cache[cache_cur].data);    cache[cache_cur].data = data;  cache[cache_cur].offset = idxitem_offset;  cache_cur++;  if (cache_cur==WORDDATA_CACHE_NUM)    cache_cur = 0;  return data;}inline bool DictBase::containSearchData(){	if (sametypesequence.empty())		return true;	return sametypesequence.find_first_of("mlgxty")!=std::string::npos;}bool DictBase::SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data){	int nWord = SearchWords.size();	std::vector<bool> WordFind(nWord, false);	int nfound=0;	if (dictfile)		fseek(dictfile, idxitem_offset, SEEK_SET);	if (dictfile)		fread(origin_data, idxitem_size, 1, dictfile);	else		dictdzfile->read(origin_data, idxitem_offset, idxitem_size);	gchar *p = origin_data;	guint32 sec_size;	int j;	if (!sametypesequence.empty()) {		gint sametypesequence_len = sametypesequence.length();		for (int i=0; i<sametypesequence_len-1; i++) {			switch (sametypesequence[i]) {			case 'm':			case 't':			case 'y':			case 'l':			case 'g':			case 'x':				for (j=0; j<nWord; j++)					if (!WordFind[j] && strstr(p, SearchWords[j].c_str())) {							WordFind[j] = true;							++nfound;					}								if (nfound==nWord)					return true;				sec_size = strlen(p)+1;				p+=sec_size;				break;			default:				if (g_ascii_isupper(sametypesequence[i])) {					sec_size = *reinterpret_cast<guint32 *>(p);					sec_size += sizeof(guint32);				} else {					sec_size = strlen(p)+1;				}				p+=sec_size;			}		}		switch (sametypesequence[sametypesequence_len-1]) {		case 'm':		case 't':		case 'y':		case 'l':		case 'g':		case 'x':			sec_size = idxitem_size - (p-origin_data);			for (j=0; j<nWord; j++)				if (!WordFind[j] && 				    g_strstr_len(p, sec_size, SearchWords[j].c_str())) {						WordFind[j] = true;						++nfound;				}						if (nfound==nWord)				return true;			break;		}	} else {		while (guint32(p - origin_data)<idxitem_size) {			switch (*p) {			case 'm':			case 't':			case 'y':			case 'l':			case 'g':			case 'x':				for (j=0; j<nWord; j++)					if (!WordFind[j] && strstr(p, SearchWords[j].c_str())) {							WordFind[j] = true;							++nfound;					}								if (nfound==nWord)					return true;				sec_size = strlen(p)+1;				p+=sec_size;				break;                        default:                                if (g_ascii_isupper(*p)) {                                        sec_size = *reinterpret_cast<guint32 *>(p);					sec_size += sizeof(guint32);                                } else {                                        sec_size = strlen(p)+1;                                }                                p+=sec_size;			}		}	}	return false;}class offset_index : public index_file {public:	offset_index() : idxfile(NULL) {}	~offset_index();	bool load(const std::string& url, gulong wc, gulong fsize);	const gchar *get_key(glong idx);	void get_data(glong idx);	const gchar *get_key_and_data(glong idx);	bool lookup(const char *str, glong &idx);private:	static const gint ENTR_PER_PAGE=32;	static const char *CACHE_MAGIC;	std::vector<guint32> wordoffset;	FILE *idxfile;	gulong wordcount;	gchar wordentry_buf[256+sizeof(guint32)*2]; // The length of "word_str" should be less than 256. See src/tools/DICTFILE_FORMAT.	struct index_entry {		glong idx;		std::string keystr;		void assign(glong i, const std::string& str) {			idx=i;			keystr.assign(str);		}	};	index_entry first, last, middle, real_last;	struct page_entry {		gchar *keystr;		guint32 off, size;	};	std::vector<gchar> page_data;	struct page_t {		glong idx;		page_entry entries[ENTR_PER_PAGE];		page_t(): idx(-1) {}		void fill(gchar *data, gint nent, glong idx_);	} page;	gulong load_page(glong page_idx);	const gchar *read_first_on_page_key(glong page_idx);	const gchar *get_first_on_page_key(glong page_idx);	bool load_cache(const std::string& url);	bool save_cache(const std::string& url);	static strlist_t get_cache_variant(const std::string& url);};const char *offset_index::CACHE_MAGIC="StarDict's Cache, Version: 0.1";class wordlist_index : public index_file {public:	wordlist_index() : idxdatabuf(NULL)	{}	~wordlist_index();	bool load(const std::string& url, gulong wc, gulong fsize);	const gchar *get_key(glong idx);	void get_data(glong idx);	const gchar *get_key_and_data(glong idx);	bool lookup(const char *str, glong &idx);private:	gchar *idxdatabuf;	std::vector<gchar *> wordlist;};void offset_index::page_t::fill(gchar *data, gint nent, glong idx_) {	idx=idx_;	gchar *p=data;	glong len;	for (gint i=0; i<nent; ++i) {		entries[i].keystr=p;		len=strlen(p);		p+=len+1;		entries[i].off=g_ntohl(*reinterpret_cast<guint32 *>(p));		p+=sizeof(guint32);		entries[i].size=g_ntohl(*reinterpret_cast<guint32 *>(p));		p+=sizeof(guint32);	}}offset_index::~offset_index(){	if (idxfile)		fclose(idxfile);}inline const gchar *offset_index::read_first_on_page_key(glong page_idx){	fseek(idxfile, wordoffset[page_idx], SEEK_SET);	guint32 page_size=wordoffset[page_idx+1]-wordoffset[page_idx];	fread(wordentry_buf, std::min(sizeof(wordentry_buf), page_size), 1, idxfile); //TODO: check returned values, deal with word entry that strlen>255.	return wordentry_buf;}inline const gchar *offset_index::get_first_on_page_key(glong page_idx){	if (page_idx<middle.idx) {		if (page_idx==first.idx)			return first.keystr.c_str();		return read_first_on_page_key(page_idx);	} else if (page_idx>middle.idx) {		if (page_idx==last.idx)			return last.keystr.c_str();		return read_first_on_page_key(page_idx);	} else			return middle.keystr.c_str();}bool offset_index::load_cache(const std::string& url){	strlist_t vars=get_cache_variant(url);	for (strlist_t::const_iterator it=vars.begin(); it!=vars.end(); ++it) {		struct stat idxstat, cachestat;		if (g_stat(url.c_str(), &idxstat)!=0 ||                    g_stat(it->c_str(), &cachestat)!=0)			continue;		if (cachestat.st_mtime<idxstat.st_mtime)			continue;				MapFile mf;		if (!mf.open(it->c_str(), cachestat.st_size))			continue;		if (strncmp(mf.begin(), CACHE_MAGIC, strlen(CACHE_MAGIC))!=0)			continue;		memcpy(&wordoffset[0], mf.begin()+strlen(CACHE_MAGIC), wordoffset.size()*sizeof(wordoffset[0]));		return true;	}	return false;}strlist_t offset_index::get_cache_variant(const std::string& url){	strlist_t res;	res.push_back(url+".oft");	if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) &&	    g_mkdir(g_get_user_cache_dir(), 0700)==-1)		return res;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -