📄 mkdict.c
字号:
_tprintf(_T("Merging... ")); fflush(stdout);
nwords=xmalloc(curword*sizeof(struct word));
for (cur=nwp=merged=0;cur<curword;)
if (cur<curword-1 && !strcmp(words[cur].key,words[cur+1].key)) {
totlen=words[cur].elen+words[cur+1].elen+1;
for (end=cur+2;end<curword&&!strcmp(words[cur].key,words[end].key);++end)
{
totlen+=words[end].elen;
totlen++;
}
merged+=end-cur-1;
ne=xmalloc(totlen+1);
memcpy(ne,words[cur].entry,words[cur].elen);
nwords[nwp].key=words[cur].key;
nwords[nwp].klen=words[cur].klen;
nwords[nwp].elen=totlen;
nwords[nwp].entry=ne;
++nwp;
cp=ne+words[cur].elen;
free((void*)words[cur].entry);
++cur;
while (cur<end) {
*cp++='\n';
memcpy(cp,words[cur].entry,words[cur].elen);
cp+=words[cur].elen;
free((void*)words[cur].entry);
free((void*)words[cur].key);
++cur;
}
*cp='\0';
} else {
nwords[nwp].entry=words[cur].entry;
nwords[nwp].elen=words[cur].elen;
nwords[nwp].key=words[cur].key;
nwords[nwp].klen=words[cur].klen;
++nwp;
++cur;
}
free(words);
words=nwords;
maxword=curword=nwp;
_tprintf(_T("done (%d merged).\n"),merged);
}
// blocks
struct block {
int size;
int compressed_size;
char *key;
int keylen;
int nent;
int npara;
};
struct block *blocks;
int curblock,maxblock;
void putblock(const char *block,int blen,int nent,FILE *fp) {
char compressed[BLOCK];
int res;
uLongf dsize=sizeof(compressed);
int docomp=0;
const char *bend;
CHECKADD(blocks,curblock,maxblock);
res=compress2((Bytef *)compressed,&dsize,(Bytef *)block,blen,9);
if (res==Z_MEM_ERROR) // don't want to compress this
fwrite(block,blen,1,fp);
else if (res==Z_OK) {
if (dsize>=(unsigned)blen) // write uncompressed
fwrite(block,blen,1,fp);
else {
docomp=1;
fwrite(compressed,dsize,1,fp);
}
} else {
_ftprintf(stderr,_T("%s: zlib error: %d\n"),progname,res);
exit(1);
}
blocks[curblock].size=blen;
blocks[curblock].compressed_size=docomp ? dsize : blen;
blocks[curblock].keylen=strlen(block);
blocks[curblock].key=xmalloc(blocks[curblock].keylen+1);
memcpy(blocks[curblock].key,block,blocks[curblock].keylen+1);
blocks[curblock].nent=nent;
for (bend=block+blen;block<bend;) {
while (block<bend && *block)
++block;
if (block<bend)
++block;
while (block<bend && *block) {
if (*block=='\n')
++nent;
++block;
}
if (block<bend)
++block;
}
blocks[curblock].npara=nent;
++curblock;
}
void putword(WORD w,FILE *fp) {
putc(w&0xff,fp);
putc(w>>8,fp);
}
void putdword(DWORD d,FILE *fp) {
putc(d&0xff,fp);
putc((d>>8)&0xff,fp);
putc((d>>16)&0xff,fp);
putc(d>>24,fp);
}
DWORD getdword(FILE *fp) {
BYTE b1,b2,b3,b4;
b1=getc(fp); b2=getc(fp); b3=getc(fp); b4=getc(fp);
return (DWORD)b4<<24|(DWORD)b3<<16|(DWORD)b2<<8|b1;
}
WORD getword(FILE *fp) {
BYTE b1,b2;
b1=getc(fp); b2=getc(fp);
return (WORD)b2<<8|b1;
}
void writewords(const char *filename) {
FILE *fp;
int i;
char block[BLOCK];
int blen,nent;
long tboff,size;
_tprintf(_T("Writing... ")); fflush(stdout);
if ((fp=fopen(filename,"wb"))==NULL)
liberr(_T("Can't open file"));
setvbuf(fp,NULL,_IOFBF,65536);
// write header
fputs("DICq",fp);
putdword(src_lcid,fp); // key locale
putdword(curword,fp); // total number of entries
putdword(dest_cp,fp); // encoding
putdword(0,fp); // block table offset
// write blocks
for (i=blen=nent=0;i<curword;++i) {
if (words[i].klen+words[i].elen+2>BLOCK) {
_ftprintf(stderr,_T("%s: Entry too long.\n"),progname);
exit(1);
}
if (blen+words[i].klen+1+words[i].elen+1>BLOCK) { // flush block
putblock(block,blen,nent,fp);
blen=nent=0;
if ((curblock&7)==0) {
putc('.',stdout);
fflush(stdout);
}
}
memcpy(block+blen,words[i].key,words[i].klen+1);
blen+=words[i].klen+1;
memcpy(block+blen,words[i].entry,words[i].elen+1);
blen+=words[i].elen+1;
++nent;
}
if (nent>0)
putblock(block,blen,nent,fp);
tboff=ftell(fp);
// write block table
putdword(curblock,fp); // number of blocks
for (i=0;i<curblock;++i) {
putdword(blocks[i].size,fp); // uncompressed size
putdword(blocks[i].compressed_size,fp); // compressed size
putdword(blocks[i].keylen,fp); // key length
putdword(blocks[i].nent,fp); // number of words in this block
putdword(blocks[i].npara,fp); // number of "paragraphs" in this block
}
for (i=0;i<curblock;++i) // keys
fwrite(blocks[i].key,blocks[i].keylen,1,fp);
fflush(fp);
size=ftell(fp);
// write block table offset
fseek(fp,16,SEEK_SET);
putdword(tboff,fp);
fclose(fp);
_tprintf(_T(" done (%d block(s), %ld byte(s)).\n"),curblock,size);
}
// convert string and write
void write_str(const char *s,int len,UINT srccp,UINT dstcp,FILE *fp) {
wchar_t *wcs;
int wclen;
char *dcs;
int dclen;
int j;
if (srccp==dstcp) {
for (j=0;j<len;++j)
if (s[j]=='\n')
putc('\t',fp);
else
putc(s[j],fp);
putc('\n',fp);
return;
}
if (len==0) {
putc('\n',fp);
return;
}
wclen=MultiByteToWideChar(srccp,0,s,len,NULL,0);
if (wclen==0)
syserror(_T("Can't convert string"));
wcs=xmalloc(wclen*sizeof(wchar_t));
MultiByteToWideChar(srccp,0,s,len,wcs,wclen);
dclen=WideCharToMultiByte(dstcp,0,wcs,wclen,NULL,0,NULL,NULL);
if (dclen==0)
syserror(_T("Can't convert string"));
dcs=xmalloc(dclen);
WideCharToMultiByte(dstcp,0,wcs,wclen,dcs,dclen,NULL,NULL);
for (j=0;j<dclen;++j)
if (dcs[j]=='\n')
dcs[j]='\t';
fwrite(dcs,dclen,1,fp);
putc('\n',fp);
free(wcs);
free(dcs);
}
// find code page names
void get_cp_name(void) {
int i;
if (src_codepage_num<0) {
CPINFOEX iex;
if (GetCPInfoEx(src_cp,0,&iex)) {
for (i=0;i<curcodepage;++i)
if (codepages[i].cp==iex.CodePage) {
src_codepage_num=i;
break;
}
}
}
if (dest_codepage_num<0) {
CPINFOEX iex;
if (GetCPInfoEx(dest_cp,0,&iex)) {
for (i=0;i<curcodepage;++i)
if (codepages[i].cp==iex.CodePage) {
dest_codepage_num=i;
break;
}
}
}
}
// find locale name
void get_locale_name(void) {
int i;
if (src_locale_num<0) {
for (i=0;i<curlang;++i)
if (src_lcid==languages[i].lcid) {
src_locale_num=i;
break;
}
}
}
// print codepage info
void print_cp_lang_info(void) {
if (src_locale_num>=0)
_tprintf(_T("Source language: %x %s.%s\n"),
languages[src_locale_num].lcid,languages[src_locale_num].lang,
languages[src_locale_num].country);
if (src_codepage_num>=0)
_tprintf(_T("Source code page: %d %s\n"),
codepages[src_codepage_num].cp,codepages[src_codepage_num].name);
if (dest_codepage_num>=0)
_tprintf(_T("Output code page: %d %s\n"),
codepages[dest_codepage_num].cp,codepages[dest_codepage_num].name);
}
// decompile an existing dictionary
void decode_words(const char *ifile,const char *ofile) {
FILE *ifp,*ofp;
char buf[5];
int *csizes;
int *sizes;
long *offs;
int nblk;
int totwords,nwords;
long blocktaboff,boff;
int i;
char *cblk,*blk;
char *cp,*ep,*pp;
int mode;
if ((ifp=fopen(ifile,"rb"))==NULL)
liberr(_T("Can't open file"));
setvbuf(ifp,NULL,_IOFBF,65536);
if ((ofp=fopen(ofile,"w"))==NULL)
liberr(_T("Can't open file"));
setvbuf(ofp,NULL,_IOFBF,65536);
fread(buf,4,1,ifp);
buf[4]='\0';
if (strcmp(buf,"DICq")==0)
mode=0;
else if (strcmp(buf,"DICt")==0)
mode=1;
else
goto invalid;
src_lcid=getdword(ifp);
totwords=getdword(ifp);
src_cp=getdword(ifp);
if (dest_codepage_num<0)
dest_cp=src_cp;
get_cp_name();
get_locale_name();
print_cp_lang_info();
_tprintf(_T("Unpacking... ")); fflush(stdout);
blocktaboff=getdword(ifp);
fseek(ifp,blocktaboff,SEEK_SET);
nblk=getdword(ifp);
csizes=xmalloc(nblk*sizeof(int));
sizes=xmalloc(nblk*sizeof(int));
offs=xmalloc(nblk*sizeof(int));
// read block table
for (i=0,boff=20;i<nblk;++i) {
sizes[i]=getdword(ifp);
csizes[i]=getdword(ifp);
offs[i]=boff;
boff+=csizes[i];
getdword(ifp); // skip key length
getdword(ifp); // skip number of words in block
if (mode==0)
getdword(ifp); // skip number of paragraphs in block
}
// read blocks
for (i=nwords=0;i<nblk;++i) {
fseek(ifp,offs[i],SEEK_SET);
cblk=xmalloc(csizes[i]);
fread(cblk,csizes[i],1,ifp);
if (csizes[i]<sizes[i]) { // decompress
uLongf dlen=sizes[i];
int res;
blk=xmalloc(sizes[i]);
res=uncompress((Bytef*)blk,&dlen,(Bytef*)cblk,csizes[i]);
if (res!=Z_OK) {
_ftprintf(stderr,_T("%s: zlib error: %d\n"),progname,res);
goto error;
}
if (dlen!=(unsigned)sizes[i])
goto invalid;
} else {
blk=cblk;
cblk=NULL;
}
// write entries
for (cp=blk,ep=blk+sizes[i];cp<ep;) {
// skip key
while (cp<ep && *cp)
++cp;
if (cp<ep)
pp=++cp;
// skip entry
while (cp<ep && *cp)
++cp;
if (pp!=cp)
write_str(pp,cp-pp,src_cp,dest_cp,ofp);
if (cp<ep)
++cp;
++nwords;
}
// cleanup
free(blk);
free(cblk);
}
if (nwords!=totwords)
goto invalid;
fclose(ifp);
fclose(ofp);
_tprintf(_T("done (%d entries).\n"),nwords);
return;
invalid:
_ftprintf(stderr,_T("%s: Invalid dictionary file.\n"),progname);
error:
fclose(ofp);
remove(ofile);
exit(1);
}
int main(int argc,char **argv) {
char *state=NULL,*arg;
int opt;
// setup default locale
_tsetlocale(LC_ALL,_T(""));
// fetch the list of installed locales and code pages
EnumSystemLocales(EnumLocalesProc,LCID_INSTALLED|LCID_ALTERNATE_SORTS);
EnumSystemCodePages(EnumCodePagesProc,CP_INSTALLED);
qsort(languages,curlang,sizeof(struct lang),langcmp_lcid);
// set program name
if (argc>0)
progname=tchar(argv[0]);
if (progname==NULL)
progname=_T("mkdict");
// setup defaults
src_locale_num=-1;
src_lcid=GetUserDefaultLCID();
src_codepage_num=-1;
dest_codepage_num=-1;
src_cp=CP_OEMCP;
dest_cp=CP_UTF8;
// process options
if (argc<2)
usage();
--argc; ++argv; // skip program name
while ((opt=xgetopt(&argc,&argv,"LCl:c:o:d",&state,&arg)))
switch (opt) {
case 'L':
showlocales();
break;
case 'C':
showcodepages();
break;
case 'c':
src_codepage_num=find_codepage(arg);
src_cp=codepages[src_codepage_num].cp;
if (dest_codepage_num<0) { // also set output cp
dest_codepage_num=src_codepage_num;
dest_cp=src_cp;
}
break;
case 'l':
src_locale_num=find_locale(arg);
src_lcid=languages[src_locale_num].lcid;
break;
case 'o':
dest_codepage_num=find_codepage(arg);
dest_cp=codepages[dest_codepage_num].cp;
break;
case 'd':
decode=1;
break;
}
if (argc<2) // at least source and destination files is required
usage();
// try to find locale and code page ids
// print locale and code page info
// do the work
if (decode) {
decode_words(argv[0],argv[1]);
} else {
get_locale_name();
get_cp_name();
print_cp_lang_info();
readfile(argv[0]);
sortwords();
mergewords();
writewords(argv[1]);
}
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -