📄 dictionary.cpp

📁 俄罗斯人开发的大名鼎鼎的Pocket Pc 阅读器haaliread的源代码,visual c
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 * Copyright (c) 2001,2002,2003 Mike Matsnev.  All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice immediately at the beginning of the file, without modification,
 *    this list of conditions, and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Absolutely no warranty of function or purpose is made by the author
 *    Mike Matsnev.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 * $Id: Dictionary.cpp,v 1.23.2.3 2004/07/07 12:04:46 mike Exp $
 * 
 */

#include <afx.h>
#include <afxtempl.h>

#include "ptr.h"
#include "zlib.h"
#include "Unicode.h"
#include "RFile.h"
#include "TextParser.h"
#include "Dictionary.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

class Dict : public IDict
{
public:
  Dict(RFile *fp);
  ~Dict() { }

  int	  NumWords() { return m_numwords; }
  Buffer<wchar_t> GetWordW(int index);
  bool	  Find(const wchar_t *word,int& index,int& found);
  bool	  FindImp(const wchar_t *word,int& index,int *found);
  bool	  Valid() { return m_ok; }
  int	  GetStartPofWord(int index);
  int	  GetWordFromP(int para);
  int	  GetNumP() { return m_numpara; }
protected:
  struct Block {
    Buffer<char>  key;	    /* key */
    int		wordidx;    /* first word index */
    int		numwords;   /* number of words in this block */
    int		size;	    /* uncompressed block size */
    int		csize;	    /* compressed size */
    int		off;	    /* offset in the file */
    int		numpara;    /* number of paragraphs in this block */
    int		paraidx;    /* starting paragraph */
  };

  kilo::auto_ptr<RFile>	m_rf;
  DWORD			m_numblk;
  DWORD			m_numwords;
  int			m_curblk;
  CPtrArray		m_windex; // words in current block
  CPtrArray		m_kindex; // keys
  CUIntArray		m_pindex; // paragraphs
  Buffer<char>		m_buffer;
  CArray<Block,Block&>	m_blocks;
  bool			m_ok;
  LCID			m_lcid;
  UINT			m_ms_codepage;
  int			m_codepage;
  int			m_numpara;

  bool	      GetBlk(int num);
  const char  *GetWordImp(int index);
  bool	      OpenOld();
  bool	      OpenNew();
};

static DWORD    getdword(RFile *fp) {
  BYTE	  b[4];
  int	  rd=fp->read2(b,4);
  if (rd!=4)
    return 0;
  return ((DWORD)b[3]<<24)|((DWORD)b[2]<<16)|((DWORD)b[1])<<8|b[0];
}

bool Dict::OpenNew() {
  if ((m_lcid=getdword(m_rf.get()))==0)
    return false;
  if (!IsValidLocale(m_lcid,LCID_INSTALLED))
    m_lcid=GetUserDefaultLCID();
  if ((m_numwords=getdword(m_rf.get()))==0)
    return false;
  if ((m_ms_codepage=getdword(m_rf.get()))==0)
    return false;
  if ((m_codepage=Unicode::GetIntCodePage(m_ms_codepage))<0)
    m_codepage=Unicode::DefaultCodePage();
  DWORD	btab=getdword(m_rf.get());
  if (btab==0)
    return false;
  m_rf->seek(btab);
  if ((m_numblk=getdword(m_rf.get()))==0 || m_numblk>m_numwords)
    return false;
  DWORD idx,i,off,maxblock,paraidx;
  for (i=idx=paraidx=maxblock=0,off=20;i<(int)m_numblk;++i) {
    Block   blk;
    int	    keylen;
    if ((blk.size=getdword(m_rf.get()))==0)
      return false;
    if ((blk.csize=getdword(m_rf.get()))==0)
      return false;
    if ((keylen=getdword(m_rf.get()))==0)
      return false;
    if ((blk.numwords=getdword(m_rf.get()))==0)
      return false;
    if ((blk.numpara=getdword(m_rf.get()))==0)
      return false;
    blk.numpara+=blk.numwords; // append and empty line after each word
    blk.wordidx=idx;
    blk.paraidx=paraidx;
    blk.off=off;
    idx+=blk.numwords;
    paraidx+=blk.numpara;
    off+=blk.csize;
    blk.key=Buffer<char>(keylen);
    m_blocks.Add(blk);
    if (blk.size>(int)maxblock)
      maxblock=blk.size;
  }
  if (idx!=m_numwords)
    return false;
  m_numpara=paraidx;
  for (i=0;i<(int)m_numblk;++i)
    if ((m_rf->read2(m_blocks[i].key,m_blocks[i].key.size()))!=(DWORD)m_blocks[i].key.size())
      return false;
  m_buffer=Buffer<char>(maxblock);
  return true;
}

Dict::Dict(RFile *fp) :
  m_ok(false), m_numwords(0), m_numpara(0), m_numblk(0), m_curblk(-1), m_rf(fp)
{
  if (!OpenNew())
      goto fail;
  m_ok=true;
  return;
fail:
  m_buffer=Buffer<char>();
  m_blocks.RemoveAll();
}

bool  Dict::GetBlk(int num) {
  if (num<0 || num>=(int)m_numblk)
    return false;
  if (m_curblk==num)
    return true;
  m_curblk=-1;
  m_rf->seek(m_blocks[num].off);
  if (m_blocks[num].size==m_blocks[num].csize) { // uncompressed
    if (m_blocks[num].size!=(int)m_rf->read2(m_buffer,m_blocks[num].size))
      return false;
  } else {
    Buffer<unsigned char>    in(m_blocks[num].csize);
    if (m_blocks[num].csize!=(int)m_rf->read2(in,m_blocks[num].csize))
      return false;
    uLongf  len=m_blocks[num].size;
    int ret=uncompress((unsigned char *)(char *)m_buffer,&len,
      in,m_blocks[num].csize);
    if (ret!=Z_OK || (int)len!=m_blocks[num].size)
      return false;
  }
  char	  *p=m_buffer;
  char	  *e=p+m_blocks[num].size;
  int	  i,pnum;
  m_windex.SetSize(m_blocks[num].numwords);
  m_kindex.SetSize(m_blocks[num].numwords);
  m_pindex.SetSize(m_blocks[num].numwords+1);
  for (i=pnum=0;i<m_blocks[num].numwords && p<e;++i) {
    m_kindex[i]=p;
    m_pindex[i]=pnum;
    while (p<e && *p)
      ++p;
    if (p<e)
      ++p;
    m_windex[i]=p;
    while (p<e && *p) {
      if (*p=='\n')
	++pnum;
      ++p;
    }
    if (p<e)
      ++p;
    pnum+=2; // implicit empty line after each word
  }
  if (i!=m_blocks[num].numwords || pnum!=m_blocks[num].numpara)
    return false;
  m_curblk=num;
  m_pindex[m_blocks[num].numwords]=m_blocks[num].numpara;
  return true;
}

Buffer<wchar_t> Dict::GetWordW(int index) {
  const char  *word=GetWordImp(index);
  if (!word)
    return Buffer<wchar_t>();
  return Unicode::ToWCbuf(m_codepage,word,strlen(word));
}

CString	  IDict::GetWord(int index) {
  CString	    ret(Unicode::ToCS(GetWordW(index)));
  ret.Replace(_T("\n"),_T("\r\n"));
  return ret;
}

const char *Dict::GetWordImp(int index) {
  if (index<0 || index>=(int)m_numwords)
    return NULL;
  if (m_curblk<0 || index<m_blocks[m_curblk].wordidx ||
      index>=m_blocks[m_curblk].wordidx+m_blocks[m_curblk].numwords)
  {
    int	low=0;
    int	high=m_numblk-1;
    int	mid;
    for (int ni=0;;++ni) {
      if (ni>(int)m_numblk) // prevent loops on unsorted invalid data
	return NULL;
      if (low>high)
	return NULL;
      mid=(low+high)>>1;
      if (index<m_blocks[mid].wordidx)
	high=mid-1;
      else if (index>=m_blocks[mid].wordidx+m_blocks[mid].numwords)
	low=mid+1;
      else
	break;
    }
    if (!GetBlk(mid))
      return NULL;
  }
  return (const char *)m_windex[index-m_blocks[m_curblk].wordidx];
}

static int    compare_buf_str(Buffer<char>& b1,const char *b2,int l2=-1) {
  if (l2<0)
    l2=strlen(b2);
  int	res=memcmp(b1,b2,min(b1.size(),l2));
  if (res==0)
    res=b1.size()<l2 ? -1 : b1.size()>l2 ? 1 : 0;
  return res;
}

static int    compare_buf_str_len(Buffer<char>& b1,const char *b2,int l2=-1) {
  if (l2<0)
    l2=strlen(b2);
  if (l2>b1.size())
    l2=b1.size();
  const char *p=b1;
  while (l2-->0 && *p++==*b2++) ;
  return p-b1;
}

static inline int    compare_bufs(Buffer<char>& b1,Buffer<char>& b2) {
  return compare_buf_str(b1,b2,b2.size());
}

bool	Dict::FindImp(const wchar_t *word,int& index,int *found) {
  Buffer<char>	  sortkey(Unicode::SortKey(m_lcid,word));
  int	  low=0;
  int	  high=m_numblk-1;
  int	  mid;
  for (int ni=0;;++ni) {
    if (ni>(int)m_numblk) // prevent loops on unsorted data
      return false;
    if (low>high) {
      if (low==0) {
	index=0;
	if (found) {
	  *found=0;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -