📄 mkdict.c
字号:
#include <windows.h>
#include <stdio.h>
#include <locale.h>
#include <tchar.h>
#include "zlib.h"
#define BLOCK 65536
// global stuff
const TCHAR *progname;
int src_codepage_num;
int src_locale_num;
int dest_codepage_num;
UINT src_cp;
LCID src_lcid;
UINT dest_cp;
int decode;
// memory allocation
void nomem(void) {
_ftprintf(stderr,_T("%s: Out of memory!\n"),progname);
exit(1);
}
void *xmalloc(size_t size) {
void *p=malloc(size);
if (p==NULL)
nomem();
return p;
}
void *xrealloc(void *m,size_t size) {
void *p=realloc(m,size);
if (size>0 && p==NULL)
nomem();
return p;
}
TCHAR *xstrdup(const TCHAR *s) {
TCHAR *n=_tcsdup(s);
if (n==NULL)
nomem();
return n;
}
// error handling
void syserror(const TCHAR *msg) {
DWORD code=GetLastError();
LPTSTR *errmsg;
if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
NULL,code,0,(LPTSTR)&errmsg,0,NULL))
_ftprintf(stderr,_T("%s: %s: %s\n"),progname,msg,errmsg);
else
_ftprintf(stderr,_T("%s: %s: %s\n"),progname,msg,_T("Unknown error"));
exit(1);
}
void liberr(const TCHAR *msg) {
_ftprintf(stderr,
#ifdef UNICODE
_T("%s: %s: %S"),
#else
_T("%s: %s: %s"),
#endif
progname,msg,strerror(errno));
exit(1);
}
TCHAR *tchar(const char *s) {
#ifdef UNICODE
int srclen=strlen(s);
int wclen;
TCHAR *ws;
if (!srclen)
return xstrdup(_T(""));
wclen=MultiByteToWideChar(CP_OEMCP,0,s,srclen,NULL,0);
if (wclen==0)
syserror(_T("Can't convert string"));
ws=xmalloc((wclen+1)*sizeof(TCHAR));
MultiByteToWideChar(CP_OEMCP,0,s,srclen,ws,wclen);
ws[wclen]='\0';
return ws;
#else
return xstrdup(s);
#endif
}
// custom getopt
int xgetopt(int *argc,char ***argv,const char *ospec,
const char **state,const char **arg)
{
const char *cp;
char opt;
if (!*state || !(*state)[0]) { // look a the next arg
if (!*argc || !(*argv)[0] || (*argv)[0][0]!='-') // no more options
return 0;
if (!(*argv)[0][1]) // a lone '-', treat as an end of list
return 0;
if ((*argv)[0][1]=='-') { // '--', ignore rest of text and stop
--*argc; ++*argv;
return 0;
}
*state=(*argv)[0]+1;
--*argc;
++*argv;
}
// we are in a middle of an arg
opt=*(*state)++;
for (cp=ospec;*cp;++cp) {
if (*cp==opt)
goto found;
if (cp[1]==':')
++cp;
}
_ftprintf(stderr,_T("%s: Invalid option: '%c'\n"),progname,opt);
exit(1);
found:
if (cp[1]==':') { // option requires an argument
if (**state) { // use rest of string
*arg=*state;
*state=NULL;
return (unsigned char)opt;
}
// use next arg if available
if (*argc) {
*arg=(*argv)[0];
--*argc;
++*argv;
return (unsigned char)opt;
}
// barf about missing args
_ftprintf(stderr,_T("%s: Option '%c' requires an argument\n"),progname,opt);
exit(1);
}
// just return current option
return (unsigned char)opt;
}
// hexdump
void hexdump(const wchar_t *str) {
int len=wcslen(str);
int i;
while (len>0) {
printf(" ");
for (i=0;i<8 && len>0;--i,--len,++str)
printf("%04x ",*str);
printf("\n");
}
}
// generic arrays support
void growarray(void **arr,int itemsize,int *maxitems) {
int mi=*maxitems;
mi+=mi ? (mi>8192 ? 8192 : mi) : 32;
*arr=xrealloc(*arr,mi*itemsize);
*maxitems=mi;
}
#define CHECKADD(ptr,cur,max) do { \
if ((cur)>=(max)) \
growarray((void **)&(ptr), \
sizeof((ptr)[0]),&(max)); \
} while (0)
// languages support
struct lang {
LCID lcid;
TCHAR *country;
TCHAR *lang;
};
struct lang *languages;
int curlang,maxlang;
void addlang(LCID lcid,const TCHAR *country,const TCHAR *lang) {
CHECKADD(languages,curlang,maxlang);
languages[curlang].lcid=lcid;
languages[curlang].country=xstrdup(country);
languages[curlang].lang=xstrdup(lang);
++curlang;
}
BOOL CALLBACK EnumLocalesProc(LPTSTR name) {
LCID lcid;
TCHAR country[1024],lang[1024];
int ret;
if (_stscanf(name,_T("%x"),&lcid)==1) {
ret=GetLocaleInfo(lcid,LOCALE_SENGCOUNTRY,country,sizeof(country)/sizeof(TCHAR)-1);
if (ret==0)
return TRUE;
country[ret]='\0';
ret=GetLocaleInfo(lcid,LOCALE_SENGLANGUAGE,lang,sizeof(lang)/sizeof(TCHAR));
lang[ret]='\0';
addlang(lcid,country,lang);
}
return TRUE;
}
int langcmp(const void *v1,const void *v2) {
const struct lang *l1=v1;
const struct lang *l2=v2;
int val;
val=_tcsicmp(l1->lang,l2->lang);
if (val==0)
val=_tcsicmp(l1->country,l2->country);
return val;
}
int langcmp_lcid(const void *v1,const void *v2) {
const struct lang *l1=v1;
const struct lang *l2=v2;
int val;
val=_tcsicmp(l1->lang,l2->lang);
if (val==0)
val=l1->lcid<l2->lcid ? -1 : l1->lcid==l2->lcid ? 0 : 1;
return val;
}
#define LANGFMT1 " %6s %-30s %-30s\n"
#define LANGFMT2 " %6x %-30s %-30s\n"
void showlocales(void) {
int i;
LCID deflcid;
qsort(languages,curlang,sizeof(struct lang),langcmp);
_tprintf(_T("Installed locales:\n"));
_tprintf(_T(LANGFMT1),_T("LCID"),_T("Language"),_T("Country"));
for (i=0;i<70;++i)
putc('-',stdout);
putc('\n',stdout);
deflcid=GetUserDefaultLCID();
for (i=0;i<curlang;++i)
_tprintf(_T(LANGFMT2),
languages[i].lcid,
languages[i].lang,
languages[i].country);
exit(0);
}
int find_locale(const char *l) {
TCHAR *wl=tchar(l);
LCID id;
int i;
TCHAR *country;
country=_tcschr(wl,_T('.'));
if (country)
*country++='\0';
for (i=0;i<curlang;++i)
if (!_tcsicmp(wl,languages[i].lang) &&
(!country || !_tcsicmp(country,languages[i].country)))
goto found;
if (_stscanf(wl,_T("%x"),&id)==1)
for (i=0;i<curlang;++i)
if (languages[i].lcid==id)
goto found;
_ftprintf(stderr,_T("%s: Language '%s' not found.\n"),progname,wl);
exit(1);
found:
free(wl);
return i;
}
// code pages support
struct codepage {
UINT cp;
const TCHAR *name;
};
struct codepage *codepages;
int curcodepage,maxcodepage;
void addcodepage(UINT id,const TCHAR *name) {
CHECKADD(codepages,curcodepage,maxcodepage);
codepages[curcodepage].cp=id;
codepages[curcodepage].name=xstrdup(name);
++curcodepage;
}
BOOL CALLBACK EnumCodePagesProc(LPTSTR name) {
UINT id;
CPINFOEX iex;
if (_stscanf(name,_T("%d"),&id)==1 && GetCPInfoEx(id,0,&iex)) {
TCHAR *rbr,*lbr=_tcschr(iex.CodePageName,_T('('));
if (lbr) {
++lbr;
rbr=_tcschr(lbr,_T(')'));
if (rbr)
*rbr='\0';
else
lbr=iex.CodePageName;
}
addcodepage(iex.CodePage,lbr);
}
return TRUE;
}
int codepagecmp(const void *v1,const void *v2) {
const struct codepage *c1=v1;
const struct codepage *c2=v2;
return c1->cp<c2->cp ? -1 : c1->cp>c2->cp ? 1 : 0;
}
void showcodepages(void) {
int i;
qsort(codepages,curcodepage,sizeof(struct codepage),codepagecmp);
_tprintf(_T("Installed code pages:\n"));
for (i=0;i<curcodepage;++i)
_tprintf(_T(" %5d %s\n"),codepages[i].cp,codepages[i].name);
exit(1);
}
int find_codepage(const char *cp) {
TCHAR *wcp=tchar(cp);
UINT ucp;
int i;
if (sscanf(cp,"%d",&ucp)==1) { // looks like a numeric codepage
for (i=0;i<curcodepage;++i)
if (codepages[i].cp==ucp)
goto found;
} else {
for (i=0;i<curcodepage;++i)
if (!_tcsicmp(wcp,codepages[i].name))
goto found;
}
_ftprintf(stderr,_T("%s: Codepage '%s' not found.\n"),progname,wcp);
exit(1);
found:
free(wcp);
return i;
}
// usage
void usage(void) {
_tprintf(_T("Usage: %s [options] source destination\n")
_T(" Options:\n")
_T(" -L list available languages\n")
_T(" -C list available code pages\n")
_T(" -l <language> specify the language for dictionary keys\n")
_T(" -c <code page> specify source code page\n")
_T(" -o <code page> specify output code page\n")
_T(" -d unpack a compiled dictionary\n"),
progname
);
exit(1);
}
// word entry
struct word {
const char *entry; // utf8 text
int elen; // entry length
const char *key; // unicode sort key
int klen; // key length
};
struct word *words;
int curword,maxword;
void addword(const char *entry,int mblen,int line) {
int wclen;
int ulen;
wchar_t *wentry;
char *sortkey;
char *uentry;
int sortkeylen;
int i;
if (mblen==0) // ignore empty words
return;
wclen=MultiByteToWideChar(src_cp,0,entry,mblen,NULL,0);
if (wclen==0)
syserror(_T("Can't convert string to unicode"));
wentry=xmalloc((wclen+1)*sizeof(wchar_t));
MultiByteToWideChar(src_cp,0,entry,mblen,wentry,wclen);
wentry[wclen]='\0';
for (i=1;i<wclen-1;++i)
if (wentry[i]==' ' && wentry[i+1]==' ')
goto found;
_ftprintf(stderr,_T("%s: Invalid entry: '%s' at line %d (%d,%d)\n"),progname,wentry,line,mblen,wclen);
exit(1);
found:
sortkeylen=LCMapStringW(src_lcid,LCMAP_SORTKEY|NORM_IGNORECASE,
wentry,i,NULL,0);
if (sortkeylen==0)
syserror(_T("Can't get sort key"));
sortkey=xmalloc(sortkeylen);
LCMapStringW(src_lcid,LCMAP_SORTKEY|NORM_IGNORECASE,
wentry,i,(wchar_t *)sortkey,sortkeylen);
--sortkeylen; // we don't want an extra NUL byte
// replace tabs with '\n'
for (i=0;i<wclen;++i)
if (wentry[i]=='\t')
wentry[i]='\n';
// convert to utf8
ulen=WideCharToMultiByte(dest_cp,0,wentry,wclen,NULL,0,NULL,NULL);
if (ulen==0)
syserror(_T("Can't convert string to multibyte"));
uentry=xmalloc(ulen+1);
WideCharToMultiByte(dest_cp,0,wentry,wclen,uentry,ulen,NULL,NULL);
uentry[ulen]='\0';
free(wentry);
// add word
CHECKADD(words,curword,maxword);
words[curword].entry=uentry;
words[curword].elen=ulen;
words[curword].key=sortkey;
words[curword].klen=sortkeylen;
++curword;
}
void readfile(const char *filename) {
FILE *fp;
char inbuf[65536];
int line=1;
_tprintf(_T("Loading... ")); fflush(stdout);
if ((fp=fopen(filename,"r"))==NULL)
liberr(_T("Can't open file"));
setvbuf(fp,NULL,_IOFBF,65536);
while (fgets(inbuf,sizeof(inbuf),fp)) {
int len=strlen(inbuf);
while (len>0 && (inbuf[len-1]=='\r' || inbuf[len-1]=='\n'))
--len;
inbuf[len]='\0';
if (len)
addword(inbuf,len,line);
++line;
}
fclose(fp);
_tprintf(_T("done (%d entries).\n"),curword);
}
int wordcmp(const void *v1,const void *v2) {
return strcmp(((const struct word *)v1)->key,((const struct word *)v2)->key);
}
void sortwords(void) {
_tprintf(_T("Sorting... ")); fflush(stdout);
qsort(words,curword,sizeof(struct word),wordcmp);
_tprintf(_T("done.\n"));
}
void mergewords(void) {
struct word *nwords;
int nwp;
int cur,end,totlen;
char *ne,*cp;
int merged;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -