📄 dictionary.cpp
字号:
return true;
}
}
return false;
}
mid=(low+high)>>1;
int cmp=compare_bufs(sortkey,m_blocks[mid].key);
if (cmp<0)
high=mid-1;
else {
if (mid==(int)m_numblk-1) // last block, stop search
break;
cmp=compare_bufs(sortkey,m_blocks[mid+1].key);
if (cmp<0) // found it
break;
low=mid+1;
}
}
int blk=mid;
if (!GetBlk(blk))
return false;
low=0;
high=m_blocks[blk].numwords-1;
for (int nj=0;;++nj) {
if (nj>m_blocks[blk].numwords) // prevent loops on invalid data
return false;
if (low>high) { // no such word in this block
if (!found) // shortcut
return false;
int idx=m_blocks[blk].wordidx+high;
if (idx<0)
idx=0;
if (idx<(int)m_numwords-1) {
if (!GetWordImp(idx))
return false;
int cur=idx-m_blocks[m_curblk].wordidx;
int l1=compare_buf_str_len(sortkey,(const char *)m_kindex[cur]);
if (++cur>=m_blocks[m_curblk].numwords) {
if (!GetWordImp(idx+1))
return false;
cur=0;
}
if (compare_buf_str_len(sortkey,(const char *)m_kindex[cur])>l1)
++idx;
}
index=idx;
*found=2;
return true;
}
mid=(low+high)>>1;
int cmp=compare_buf_str(sortkey,(const char *)m_kindex[mid]);
if (cmp<0)
high=mid-1;
else if (cmp>0)
low=mid+1;
else { // found
index=m_blocks[blk].wordidx+mid;
if (found)
*found=1;
return true;
}
}
}
static struct {
const wchar_t *pattern;
const wchar_t *replacement;
} endings[]={
{ L"s", NULL },
{ L"se", NULL },
{ L"sei", L"y" },
{ L"de", NULL },
{ L"de", L"e" },
{ L"dei", L"y" },
{ L"de.", L"1" },
{ L"gni", NULL },
{ L"gni", L"e" },
{ L"gniy", L"ie" },
{ L"gni.", L"1" },
{ L"re", NULL },
{ L"re", L"e" },
{ L"rei", L"y" },
{ L"re." L"1" },
{ L"tse", NULL },
{ L"tse", L"e" },
{ L"tsei", L"y" },
{ L"tse.", L"1" },
{ NULL, NULL }
};
bool Dict::Find(const wchar_t *word,int& index,int& found) {
if (PRIMARYLANGID(m_lcid)!=LANG_ENGLISH)
return FindImp(word,index,&found);
// handle english word endings
wchar_t tmpbuf[64];
int wordlen=wcslen(word);
if (wordlen>=sizeof(tmpbuf)/sizeof(tmpbuf[0]))
return FindImp(word,index,&found);
// check exact match
if (FindImp(word,index,NULL)) {
found=1;
return true;
}
for (int ending=0;endings[ending].pattern;++ending) {
const wchar_t *pattern=endings[ending].pattern;
for (int i=0;i<wordlen;++i) {
if (pattern[i]==L'.') { // doubled char here
if (i+1<wordlen && word[wordlen-i-1]==word[wordlen-i-2]) {
i+=2;
goto match;
}
break;
}
if (!pattern[i]) { // end of pattern, matched
match:
int k;
for (k=0;k<wordlen-i;++k)
tmpbuf[k]=word[k];
const wchar_t *replacement=endings[ending].replacement;
if (replacement) {
if (replacement[0]==L'1')
tmpbuf[k++]=word[wordlen-i];
else
while (*replacement)
tmpbuf[k++]=*replacement++;
}
tmpbuf[k++]=L'\0';
// relookup
if (FindImp(tmpbuf,index,NULL)) {
found=1;
return true;
}
break;
}
if (pattern[i]!=word[wordlen-i-1])
break;
}
}
// still no match, proceed as usual
return FindImp(word,index,&found);
}
int Dict::GetWordFromP(int para) {
if (para<0 || para>=m_numpara)
return 0;
int low=0;
int high=m_numblk-1;
int mid;
for (int ni=0;;++ni) {
if (ni>(int)m_numblk) // prevent loops on unsorted invalid data
return 0;
if (low>high)
return 0;
mid=(low+high)>>1;
if (para<m_blocks[mid].paraidx)
high=mid-1;
else if (para>=m_blocks[mid].paraidx+m_blocks[mid].numpara)
low=mid+1;
else
break;
}
if (!GetBlk(mid))
return 0;
low=0;
high=m_blocks[m_curblk].numwords;
para-=m_blocks[m_curblk].paraidx;
for (int ni=0;;++ni) {
if (ni>m_blocks[m_curblk].numwords)
return 0;
if (low>high)
return 0;
mid=(low+high)>>1;
if (para<(int)m_pindex[mid])
high=mid-1;
else if (para>=(int)m_pindex[mid+1])
low=mid+1;
else
break;
}
return m_blocks[m_curblk].wordidx+mid;
}
int Dict::GetStartPofWord(int index) {
if (GetWordImp(index))
return m_blocks[m_curblk].paraidx+m_pindex[index-m_blocks[m_curblk].wordidx];
return 0;
}
static class SimpleDictInit: public IDict::DictInit {
static IDict *create_simple_dict(RFile *fp) { return new Dict(fp); }
public:
SimpleDictInit() : DictInit("DICq",create_simple_dict) { }
} g_dict_init;
// create a proper dictionary
IDict *IDict::Create(const CString& filename,CString *errmsg) {
kilo::auto_ptr<RFile> fp(new RFile(filename));
if (!fp->Reopen()) {
if (errmsg)
*errmsg=FileExceptionInfo(filename,GetLastError());
return NULL;
}
BYTE sig[SIGSIZE];
if (fp->read2(sig,SIGSIZE)!=SIGSIZE) {
if (errmsg)
errmsg->Format(_T("%s: Ivalid dictionary file"),(const TCHAR*)filename);
return NULL;
}
IDict *d=NULL;
for (DictInit *di=DictInit::m_head;di;di=di->m_next)
if (memcmp(sig,di->m_sig,SIGSIZE)==0) {
d=di->m_create(fp.release());
break;
}
if (d && d->Valid())
return d;
delete d;
if (errmsg)
errmsg->Format(_T("%s: Ivalid dictionary file"),(const TCHAR*)filename);
return NULL;
}
IDict::DictInit *IDict::DictInit::m_head;
IDict::DictInit::DictInit(const char *sig,IDict *(*create)(RFile *)) {
m_next=m_head;
m_create=create;
strncpy(m_sig,sig,sizeof(m_sig));
m_head=this;
}
Buffer<wchar_t> DictParser::GetParagraphImp(int para,bool& isfirst) {
int ws=m_dict->GetWordFromP(para);
Buffer<wchar_t> word(m_dict->GetWordW(ws));
int ps=m_dict->GetStartPofWord(ws);
isfirst=ps==para;
const wchar_t *wp=word,*we=wp+word.size();
while (ps!=para && wp<we) {
while (wp<we && *wp!=L'\n')
++wp;
if (wp<we)
++wp;
++ps;
}
const wchar_t *wq=wp;
while (wq<we && *wq!=L'\n')
++wq;
return Buffer<wchar_t>(wp,wq-wp);
}
Paragraph DictParser::GetParagraph(int docid,int para) {
bool isfirst;
Buffer<wchar_t> txt(GetParagraphImp(para,isfirst));
Paragraph p(txt.size());
p.str=txt;
for (int j=0;j<p.str.size() && p.str[j]==L' ';++j)
p.str[j]=0xA0; // replace leading spaces with nbsp
p.cflags.Zero();
if (isfirst)
for (int i=0;i<p.str.size()-1;++i) {
if (p.str[i]==L' ' && p.str[i+1]==L' ')
break;
p.cflags[i].bold=1;
}
return p;
}
int DictParser::GetPLength(int docid,int para) {
bool isfirst;
return GetParagraphImp(para,isfirst).size();
}
DictParser *DictParser::OpenDict(const CString& path,CString *errmsg) {
IDict *d=IDict::Create(path,errmsg);
if (!d)
return NULL;
return new DictParser(kilo::auto_ptr<IDict>(d));
}
bool DictParser::LookupReference(const wchar_t *rname,FilePos& dest) {
int index,found;
if (m_dict->Find(rname,index,found)) {
dest.docid=0;
dest.para=m_dict->GetStartPofWord(index);
dest.off=0;
return true;
}
return false;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -