📄 dictionary.cpp

📁 俄罗斯人开发的大名鼎鼎的Pocket Pc 阅读器haaliread的源代码,visual c
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
	  return true;
	}
      }
      return false;
    }
    mid=(low+high)>>1;
    int cmp=compare_bufs(sortkey,m_blocks[mid].key);
    if (cmp<0)
      high=mid-1;
    else {
      if (mid==(int)m_numblk-1) // last block, stop search
	break;
      cmp=compare_bufs(sortkey,m_blocks[mid+1].key);
      if (cmp<0) // found it
	break;
      low=mid+1;
    }
  }
  int	blk=mid;
  if (!GetBlk(blk))
    return false;
  low=0;
  high=m_blocks[blk].numwords-1;
  for (int nj=0;;++nj) {
    if (nj>m_blocks[blk].numwords) // prevent loops on invalid data
      return false;
    if (low>high) { // no such word in this block
      if (!found) // shortcut
	return false;
      int   idx=m_blocks[blk].wordidx+high;
      if (idx<0)
	idx=0;
      if (idx<(int)m_numwords-1) {
	if (!GetWordImp(idx))
	  return false;
	int cur=idx-m_blocks[m_curblk].wordidx;
	int l1=compare_buf_str_len(sortkey,(const char *)m_kindex[cur]);
	if (++cur>=m_blocks[m_curblk].numwords) {
	  if (!GetWordImp(idx+1))
	    return false;
	  cur=0;
	}
	if (compare_buf_str_len(sortkey,(const char *)m_kindex[cur])>l1)
	  ++idx;
      }
      index=idx;
      *found=2;
      return true;
    }
    mid=(low+high)>>1;
    int cmp=compare_buf_str(sortkey,(const char *)m_kindex[mid]);
    if (cmp<0)
      high=mid-1;
    else if (cmp>0)
      low=mid+1;
    else { // found
      index=m_blocks[blk].wordidx+mid;
      if (found)
	*found=1;
      return true;
    }
  }
}

static struct {
  const wchar_t	  *pattern;
  const wchar_t	  *replacement;
} endings[]={
  { L"s",	NULL  },
  { L"se",	NULL  },
  { L"sei",	L"y"  },
  { L"de",	NULL  },
  { L"de",	L"e"  },
  { L"dei",	L"y"  },
  { L"de.",	L"1"  },
  { L"gni",	NULL  },
  { L"gni",	L"e"  },
  { L"gniy",	L"ie" },
  { L"gni.",	L"1"  },
  { L"re",	NULL  },
  { L"re",	L"e"  },
  { L"rei",	L"y"  },
  { L"re."	L"1"  },
  { L"tse",	NULL  },
  { L"tse",	L"e"  },
  { L"tsei",	L"y"  },
  { L"tse.",	L"1"  },
  { NULL,	NULL  }
};

bool  Dict::Find(const wchar_t *word,int& index,int& found) {
  if (PRIMARYLANGID(m_lcid)!=LANG_ENGLISH)
    return FindImp(word,index,&found);
  // handle english word endings
  wchar_t	tmpbuf[64];
  int		wordlen=wcslen(word);
  if (wordlen>=sizeof(tmpbuf)/sizeof(tmpbuf[0]))
    return FindImp(word,index,&found);
  // check exact match
  if (FindImp(word,index,NULL)) {
    found=1;
    return true;
  }
  for (int ending=0;endings[ending].pattern;++ending) {
    const wchar_t   *pattern=endings[ending].pattern;
    for (int i=0;i<wordlen;++i) {
      if (pattern[i]==L'.') { // doubled char here
	if (i+1<wordlen && word[wordlen-i-1]==word[wordlen-i-2]) {
	  i+=2;
	  goto match;
	}
	break;
      }
      if (!pattern[i]) { // end of pattern, matched
match:
	int   k;
	for (k=0;k<wordlen-i;++k)
	  tmpbuf[k]=word[k];
	const wchar_t	*replacement=endings[ending].replacement;
	if (replacement) {
	  if (replacement[0]==L'1')
	    tmpbuf[k++]=word[wordlen-i];
	  else
	    while (*replacement)
	      tmpbuf[k++]=*replacement++;
	}
	tmpbuf[k++]=L'\0';
	// relookup
	if (FindImp(tmpbuf,index,NULL)) {
	  found=1;
	  return true;
	}
	break;
      }
      if (pattern[i]!=word[wordlen-i-1])
	break;
    }
  }
  // still no match, proceed as usual
  return FindImp(word,index,&found);
}

int   Dict::GetWordFromP(int para) {
  if (para<0 || para>=m_numpara)
    return 0;
  int	low=0;
  int	high=m_numblk-1;
  int	mid;
  for (int ni=0;;++ni) {
    if (ni>(int)m_numblk) // prevent loops on unsorted invalid data
      return 0;
    if (low>high)
      return 0;
    mid=(low+high)>>1;
    if (para<m_blocks[mid].paraidx)
      high=mid-1;
    else if (para>=m_blocks[mid].paraidx+m_blocks[mid].numpara)
      low=mid+1;
    else
      break;
  }
  if (!GetBlk(mid))
    return 0;
  low=0;
  high=m_blocks[m_curblk].numwords;
  para-=m_blocks[m_curblk].paraidx;
  for (int ni=0;;++ni) {
    if (ni>m_blocks[m_curblk].numwords)
      return 0;
    if (low>high)
      return 0;
    mid=(low+high)>>1;
    if (para<(int)m_pindex[mid])
      high=mid-1;
    else if (para>=(int)m_pindex[mid+1])
      low=mid+1;
    else
      break;
  }
  return m_blocks[m_curblk].wordidx+mid;
}

int   Dict::GetStartPofWord(int index) {
  if (GetWordImp(index))
    return m_blocks[m_curblk].paraidx+m_pindex[index-m_blocks[m_curblk].wordidx];
  return 0;
}

static class SimpleDictInit: public IDict::DictInit {
  static IDict *create_simple_dict(RFile *fp) { return new Dict(fp); }
public:
  SimpleDictInit() : DictInit("DICq",create_simple_dict) { }
} g_dict_init;

// create a proper dictionary
IDict *IDict::Create(const CString& filename,CString *errmsg) {
  kilo::auto_ptr<RFile>	  fp(new RFile(filename));

  if (!fp->Reopen()) {
    if (errmsg)
      *errmsg=FileExceptionInfo(filename,GetLastError());
    return NULL;
  }

  BYTE	  sig[SIGSIZE];
  if (fp->read2(sig,SIGSIZE)!=SIGSIZE) {
    if (errmsg)
      errmsg->Format(_T("%s: Ivalid dictionary file"),(const TCHAR*)filename);
    return NULL;
  }
  IDict *d=NULL;
  for (DictInit	*di=DictInit::m_head;di;di=di->m_next)
    if (memcmp(sig,di->m_sig,SIGSIZE)==0) {
      d=di->m_create(fp.release());
      break;
    }
  if (d && d->Valid())
    return d;
  delete d;
  if (errmsg)
    errmsg->Format(_T("%s: Ivalid dictionary file"),(const TCHAR*)filename);
  return NULL;
}

IDict::DictInit	  *IDict::DictInit::m_head;

IDict::DictInit::DictInit(const char *sig,IDict *(*create)(RFile *)) {
  m_next=m_head;
  m_create=create;
  strncpy(m_sig,sig,sizeof(m_sig));
  m_head=this;
}

Buffer<wchar_t>	DictParser::GetParagraphImp(int para,bool& isfirst) {
  int		    ws=m_dict->GetWordFromP(para);
  Buffer<wchar_t>   word(m_dict->GetWordW(ws));
  int		    ps=m_dict->GetStartPofWord(ws);

  isfirst=ps==para;
  const wchar_t	    *wp=word,*we=wp+word.size();
  while (ps!=para && wp<we) {
    while (wp<we && *wp!=L'\n')
      ++wp;
    if (wp<we)
      ++wp;
    ++ps;
  }
  const wchar_t	    *wq=wp;
  while (wq<we && *wq!=L'\n')
    ++wq;
  return Buffer<wchar_t>(wp,wq-wp);
}

Paragraph DictParser::GetParagraph(int docid,int para) {
  bool isfirst;
  Buffer<wchar_t>   txt(GetParagraphImp(para,isfirst));
  Paragraph	p(txt.size());
  p.str=txt;
  for (int j=0;j<p.str.size() && p.str[j]==L' ';++j)
    p.str[j]=0xA0; // replace leading spaces with nbsp
  p.cflags.Zero();
  if (isfirst)
    for (int i=0;i<p.str.size()-1;++i) {
      if (p.str[i]==L' ' && p.str[i+1]==L' ')
	break;
      p.cflags[i].bold=1;
    }

  return p;
}

int   DictParser::GetPLength(int docid,int para) {
  bool isfirst;
  return GetParagraphImp(para,isfirst).size();
}

DictParser  *DictParser::OpenDict(const CString& path,CString *errmsg) {
  IDict	  *d=IDict::Create(path,errmsg);
  if (!d)
    return NULL;
  return new DictParser(kilo::auto_ptr<IDict>(d));
}

bool  DictParser::LookupReference(const wchar_t *rname,FilePos& dest) {
  int	  index,found;
  if (m_dict->Find(rname,index,found)) {
    dest.docid=0;
    dest.para=m_dict->GetStartPofWord(index);
    dest.off=0;
    return true;
  }
  return false;
}
上一页 12
💿 文件大小 799 K
👤 上传用户 mislrb
📂 所属分类 Windows Mobile
🏷️ 相关标签

#haaliread #Pocket #visual #Pc
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -