⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mkdict.c

📁 俄罗斯人开发的大名鼎鼎的Pocket Pc 阅读器haaliread的源代码,visual c
💻 C
📖 第 1 页 / 共 2 页
字号:
  _tprintf(_T("Merging... ")); fflush(stdout);
  nwords=xmalloc(curword*sizeof(struct word));
  for (cur=nwp=merged=0;cur<curword;)
    if (cur<curword-1 && !strcmp(words[cur].key,words[cur+1].key)) {
      totlen=words[cur].elen+words[cur+1].elen+1;
      for (end=cur+2;end<curword&&!strcmp(words[cur].key,words[end].key);++end)
      {
	totlen+=words[end].elen;
	totlen++;
      }
      merged+=end-cur-1;
      ne=xmalloc(totlen+1);
      memcpy(ne,words[cur].entry,words[cur].elen);
      nwords[nwp].key=words[cur].key;
      nwords[nwp].klen=words[cur].klen;
      nwords[nwp].elen=totlen;
      nwords[nwp].entry=ne;
      ++nwp;
      cp=ne+words[cur].elen;
      free((void*)words[cur].entry);
      ++cur;
      while (cur<end) {
	*cp++='\n';
	memcpy(cp,words[cur].entry,words[cur].elen);
	cp+=words[cur].elen;
	free((void*)words[cur].entry);
	free((void*)words[cur].key);
	++cur;
      }
      *cp='\0';
    } else {
      nwords[nwp].entry=words[cur].entry;
      nwords[nwp].elen=words[cur].elen;
      nwords[nwp].key=words[cur].key;
      nwords[nwp].klen=words[cur].klen;
      ++nwp;
      ++cur;
    }
  free(words);
  words=nwords;
  maxword=curword=nwp;
  _tprintf(_T("done (%d merged).\n"),merged);
}

// blocks
struct block {
  int	      size;
  int	      compressed_size;
  char	      *key;
  int	      keylen;
  int	      nent;
  int         npara;
};
struct block	*blocks;
int		curblock,maxblock;

void  putblock(const char *block,int blen,int nent,FILE *fp) {
  char	  compressed[BLOCK];
  int     res;
  uLongf  dsize=sizeof(compressed);
  int	  docomp=0;
  const char *bend;

  CHECKADD(blocks,curblock,maxblock);
  res=compress2((Bytef *)compressed,&dsize,(Bytef *)block,blen,9);
  if (res==Z_MEM_ERROR) // don't want to compress this
    fwrite(block,blen,1,fp);
  else if (res==Z_OK) {
    if (dsize>=(unsigned)blen) // write uncompressed
      fwrite(block,blen,1,fp);
    else {
      docomp=1;
      fwrite(compressed,dsize,1,fp);
    }
  } else {
    _ftprintf(stderr,_T("%s: zlib error: %d\n"),progname,res);
    exit(1);
  }
  blocks[curblock].size=blen;
  blocks[curblock].compressed_size=docomp ? dsize : blen;
  blocks[curblock].keylen=strlen(block);
  blocks[curblock].key=xmalloc(blocks[curblock].keylen+1);
  memcpy(blocks[curblock].key,block,blocks[curblock].keylen+1);
  blocks[curblock].nent=nent;
  for (bend=block+blen;block<bend;) {
    while (block<bend && *block)
      ++block;
    if (block<bend)
      ++block;
    while (block<bend && *block) {
      if (*block=='\n')
	++nent;
      ++block;
    }
    if (block<bend)
      ++block;
  }
  blocks[curblock].npara=nent;
  ++curblock;
}

void  putword(WORD w,FILE *fp) {
  putc(w&0xff,fp);
  putc(w>>8,fp);
}

void  putdword(DWORD d,FILE *fp) {
  putc(d&0xff,fp);
  putc((d>>8)&0xff,fp);
  putc((d>>16)&0xff,fp);
  putc(d>>24,fp);
}

DWORD  getdword(FILE *fp) {
  BYTE	b1,b2,b3,b4;
  b1=getc(fp); b2=getc(fp); b3=getc(fp); b4=getc(fp);
  return (DWORD)b4<<24|(DWORD)b3<<16|(DWORD)b2<<8|b1;
}

WORD  getword(FILE *fp) {
  BYTE	b1,b2;
  b1=getc(fp); b2=getc(fp);
  return (WORD)b2<<8|b1;
}

void  writewords(const char *filename) {
  FILE	  *fp;
  int	  i;
  char	  block[BLOCK];
  int	  blen,nent;
  long	  tboff,size;

  _tprintf(_T("Writing... ")); fflush(stdout);
  if ((fp=fopen(filename,"wb"))==NULL)
    liberr(_T("Can't open file"));
  setvbuf(fp,NULL,_IOFBF,65536);
  // write header
  fputs("DICq",fp);
  putdword(src_lcid,fp); // key locale
  putdword(curword,fp); // total number of entries
  putdword(dest_cp,fp); // encoding
  putdword(0,fp); // block table offset
  // write blocks
  for (i=blen=nent=0;i<curword;++i) {
    if (words[i].klen+words[i].elen+2>BLOCK) {
      _ftprintf(stderr,_T("%s: Entry too long.\n"),progname);
      exit(1);
    }
    if (blen+words[i].klen+1+words[i].elen+1>BLOCK) { // flush block
      putblock(block,blen,nent,fp);
      blen=nent=0;
      if ((curblock&7)==0) {
	putc('.',stdout);
	fflush(stdout);
      }
    }
    memcpy(block+blen,words[i].key,words[i].klen+1);
    blen+=words[i].klen+1;
    memcpy(block+blen,words[i].entry,words[i].elen+1);
    blen+=words[i].elen+1;
    ++nent;
  }
  if (nent>0)
    putblock(block,blen,nent,fp);
  tboff=ftell(fp);
  // write block table
  putdword(curblock,fp); // number of blocks
  for (i=0;i<curblock;++i) {
    putdword(blocks[i].size,fp); // uncompressed size
    putdword(blocks[i].compressed_size,fp); // compressed size
    putdword(blocks[i].keylen,fp); // key length
    putdword(blocks[i].nent,fp); // number of words in this block
    putdword(blocks[i].npara,fp); // number of "paragraphs" in this block
  }
  for (i=0;i<curblock;++i) // keys
    fwrite(blocks[i].key,blocks[i].keylen,1,fp);
  fflush(fp);
  size=ftell(fp);
  // write block table offset
  fseek(fp,16,SEEK_SET);
  putdword(tboff,fp);
  fclose(fp);
  _tprintf(_T(" done (%d block(s), %ld byte(s)).\n"),curblock,size);
}

// convert string and write
void  write_str(const char *s,int len,UINT srccp,UINT dstcp,FILE *fp) {
  wchar_t   *wcs;
  int	    wclen;
  char	    *dcs;
  int	    dclen;
  int	    j;

  if (srccp==dstcp) {
    for (j=0;j<len;++j)
      if (s[j]=='\n')
	putc('\t',fp);
      else
	putc(s[j],fp);
    putc('\n',fp);
    return;
  }
  if (len==0) {
    putc('\n',fp);
    return;
  }
  wclen=MultiByteToWideChar(srccp,0,s,len,NULL,0);
  if (wclen==0)
    syserror(_T("Can't convert string"));
  wcs=xmalloc(wclen*sizeof(wchar_t));
  MultiByteToWideChar(srccp,0,s,len,wcs,wclen);
  dclen=WideCharToMultiByte(dstcp,0,wcs,wclen,NULL,0,NULL,NULL);
  if (dclen==0)
    syserror(_T("Can't convert string"));
  dcs=xmalloc(dclen);
  WideCharToMultiByte(dstcp,0,wcs,wclen,dcs,dclen,NULL,NULL);
  for (j=0;j<dclen;++j)
    if (dcs[j]=='\n')
      dcs[j]='\t';
  fwrite(dcs,dclen,1,fp);
  putc('\n',fp);
  free(wcs);
  free(dcs);
}

// find code page names
void  get_cp_name(void) {
  int i;

  if (src_codepage_num<0) {
    CPINFOEX  iex;
    if (GetCPInfoEx(src_cp,0,&iex)) {
      for (i=0;i<curcodepage;++i)
	if (codepages[i].cp==iex.CodePage) {
	  src_codepage_num=i;
	  break;
	}
    }
  }
  if (dest_codepage_num<0) {
    CPINFOEX  iex;
    if (GetCPInfoEx(dest_cp,0,&iex)) {
      for (i=0;i<curcodepage;++i)
	if (codepages[i].cp==iex.CodePage) {
	  dest_codepage_num=i;
	  break;
	}
    }
  }
}

// find locale name
void  get_locale_name(void) {
  int i;

  if (src_locale_num<0) {
    for (i=0;i<curlang;++i)
      if (src_lcid==languages[i].lcid) {
	src_locale_num=i;
	break;
      }
  }
}

// print codepage info
void  print_cp_lang_info(void) {
  if (src_locale_num>=0)
    _tprintf(_T("Source language: %x %s.%s\n"),
	languages[src_locale_num].lcid,languages[src_locale_num].lang,
	languages[src_locale_num].country);
  if (src_codepage_num>=0)
    _tprintf(_T("Source code page: %d %s\n"),
	codepages[src_codepage_num].cp,codepages[src_codepage_num].name);
  if (dest_codepage_num>=0)
    _tprintf(_T("Output code page: %d %s\n"),
	codepages[dest_codepage_num].cp,codepages[dest_codepage_num].name);
}

// decompile an existing dictionary
void  decode_words(const char *ifile,const char *ofile) {
  FILE	  *ifp,*ofp;
  char	  buf[5];
  int	  *csizes;
  int	  *sizes;
  long	  *offs;
  int	  nblk;
  int	  totwords,nwords;
  long	  blocktaboff,boff;
  int	  i;
  char	  *cblk,*blk;
  char	  *cp,*ep,*pp;
  int	  mode;

  if ((ifp=fopen(ifile,"rb"))==NULL)
    liberr(_T("Can't open file"));
  setvbuf(ifp,NULL,_IOFBF,65536);
  if ((ofp=fopen(ofile,"w"))==NULL)
    liberr(_T("Can't open file"));
  setvbuf(ofp,NULL,_IOFBF,65536);
  fread(buf,4,1,ifp);
  buf[4]='\0';
  if (strcmp(buf,"DICq")==0)
    mode=0;
  else if (strcmp(buf,"DICt")==0)
    mode=1;
  else
    goto invalid;
  src_lcid=getdword(ifp);
  totwords=getdword(ifp);
  src_cp=getdword(ifp);
  if (dest_codepage_num<0)
    dest_cp=src_cp;
  get_cp_name();
  get_locale_name();
  print_cp_lang_info();
  _tprintf(_T("Unpacking... ")); fflush(stdout);
  blocktaboff=getdword(ifp);
  fseek(ifp,blocktaboff,SEEK_SET);
  nblk=getdword(ifp);
  csizes=xmalloc(nblk*sizeof(int));
  sizes=xmalloc(nblk*sizeof(int));
  offs=xmalloc(nblk*sizeof(int));
  // read block table
  for (i=0,boff=20;i<nblk;++i) {
    sizes[i]=getdword(ifp);
    csizes[i]=getdword(ifp);
    offs[i]=boff;
    boff+=csizes[i];
    getdword(ifp); // skip key length
    getdword(ifp); // skip number of words in block
    if (mode==0)
      getdword(ifp); // skip number of paragraphs in block
  }
  // read blocks
  for (i=nwords=0;i<nblk;++i) {
    fseek(ifp,offs[i],SEEK_SET);
    cblk=xmalloc(csizes[i]);
    fread(cblk,csizes[i],1,ifp);
    if (csizes[i]<sizes[i]) { // decompress
      uLongf  dlen=sizes[i];
      int     res;
      blk=xmalloc(sizes[i]);
      res=uncompress((Bytef*)blk,&dlen,(Bytef*)cblk,csizes[i]);
      if (res!=Z_OK) {
	_ftprintf(stderr,_T("%s: zlib error: %d\n"),progname,res);
	goto error;
      }
      if (dlen!=(unsigned)sizes[i])
	goto invalid;
    } else {
      blk=cblk;
      cblk=NULL;
    }
    // write entries
    for (cp=blk,ep=blk+sizes[i];cp<ep;) {
      // skip key
      while (cp<ep && *cp)
	++cp;
      if (cp<ep)
	pp=++cp;
      // skip entry
      while (cp<ep && *cp)
	++cp;
      if (pp!=cp)
	write_str(pp,cp-pp,src_cp,dest_cp,ofp);
      if (cp<ep)
	++cp;
      ++nwords;
    }
    // cleanup
    free(blk);
    free(cblk);
  }
  if (nwords!=totwords)
    goto invalid;
  fclose(ifp);
  fclose(ofp);
  _tprintf(_T("done (%d entries).\n"),nwords);
  return;
invalid:
  _ftprintf(stderr,_T("%s: Invalid dictionary file.\n"),progname);
error:
  fclose(ofp);
  remove(ofile);
  exit(1);
}

int   main(int argc,char **argv) {
  char	*state=NULL,*arg;
  int	opt;

  // setup default locale
  _tsetlocale(LC_ALL,_T(""));
  // fetch the list of installed locales and code pages
  EnumSystemLocales(EnumLocalesProc,LCID_INSTALLED|LCID_ALTERNATE_SORTS);
  EnumSystemCodePages(EnumCodePagesProc,CP_INSTALLED);
  qsort(languages,curlang,sizeof(struct lang),langcmp_lcid);
  // set program name
  if (argc>0)
    progname=tchar(argv[0]);
  if (progname==NULL)
    progname=_T("mkdict");
  // setup defaults
  src_locale_num=-1;
  src_lcid=GetUserDefaultLCID();
  src_codepage_num=-1;
  dest_codepage_num=-1;
  src_cp=CP_OEMCP;
  dest_cp=CP_UTF8;
  // process options
  if (argc<2)
    usage();
  --argc; ++argv; // skip program name
  while ((opt=xgetopt(&argc,&argv,"LCl:c:o:d",&state,&arg)))
    switch (opt) {
      case 'L':
	showlocales();
	break;
      case 'C':
	showcodepages();
	break;
      case 'c':
	src_codepage_num=find_codepage(arg);
	src_cp=codepages[src_codepage_num].cp;
	if (dest_codepage_num<0) { // also set output cp
	  dest_codepage_num=src_codepage_num;
	  dest_cp=src_cp;
	}
	break;
      case 'l':
	src_locale_num=find_locale(arg);
	src_lcid=languages[src_locale_num].lcid;
	break;
      case 'o':
	dest_codepage_num=find_codepage(arg);
	dest_cp=codepages[dest_codepage_num].cp;
	break;
      case 'd':
	decode=1;
	break;
    }
  if (argc<2) // at least source and destination files is required
    usage();
  // try to find locale and code page ids
  // print locale and code page info
  // do the work
  if (decode) {
    decode_words(argv[0],argv[1]);
  } else {
    get_locale_name();
    get_cp_name();
    print_cp_lang_info();
    readfile(argv[0]);
    sortwords();
    mergewords();
    writewords(argv[1]);
  }
  return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -