📄 charconv_win32.c
字号:
/** * @file charconv_win32.c * @author Akinobu LEE * @date Thu Feb 17 16:02:41 2005 * * <JA> * @brief 矢机コ〖ド恃垂 (Win32 API + libjcode 蝗脱) * * Windows の WideCharToMultiByte() および MultiByteToWideChar() を * 蝗脱した矢机コ〖ド恃垂を乖うˉ"ansi" "oem" "mac" "utf-7" "utf-8" あるいは * 扦罢のコ〖ドペ〖ジ戎规粗の恃垂を·unicode を拆して乖うˉ * * Windows では EUC のコ〖ドペ〖ジに滦炳していないので·恃垂傅の矢机コ〖ドが * euc-jp のときは·libjcode で SJIS に恃垂してからunicodeへ恃垂するˉ * * </JA> * * <EN> * @brief Character set conversion using Win32 MultiByte function + libjcode * * Perform character set conversion using Windows native API * WideCharToMultiByte() and MultiByteToWideChar(). Conversion between * codepages of "ansi" "oem" "mac" "utf-7" "utf-8" or codepage number supported * at the running OS are supported using unicode. * * Conversion from Japanese-euc ("euc-jp") is optionally supported by the * libjcode library. * * </EN> * * $Revision: 1.3 $ * *//* * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <sent/stddefs.h>#ifdef CHARACTER_CONVERSION#ifdef USE_WIN32_MULTIBYTE#include <windows.h>#include <winnls.h>#include <jlib.h>static boolean euctosjis = FALSE; ///< TRUE if use libjcode for euc->sjis conv.static boolean only_euc_conv = FALSE; ///< Perform only euc->sjisstatic unsigned int from_cp; ///< Source codepagestatic unsigned int to_cp; ///< Target codepage/** * Setup charset conversion for win32. * * @param fromcode [in] input charset code name or codepage number string, NULL invalid * @param tocode [in] output charset code name or codepage number string, or NULL when disable conversion * @param enable_conv [out] store whether conversion should be enabled or not * * @return TRUE on success, FALSE on failure (unknown codename or unsupported codepage). */booleancharconv_win32_setup(char *fromcode, char *tocode, boolean *enable_conv){ unsigned int src_p, dst_p; if (tocode == NULL) { /* just disable conversion */ *enable_conv = FALSE; } else { /* determine source character set */ if (fromcode == NULL) { j_printerr("Error: charset names of both input and output should be given.\n"); j_printerr("Error: use \"-charconv from to\" instead of \"-kanji\".\n"); *enable_conv = FALSE; return FALSE; } euctosjis = FALSE; if (strmatch(fromcode, "euc-jp") || strmatch(fromcode, "euc") || strmatch(fromcode, "eucjp")) { /* pre-convert Japanese euc to Shift-jis */ euctosjis = TRUE; /* input = Shift_jis (codepage 932) */ from_cp = 932; } else if (strmatch(fromcode, "ansi")) { /* ANSI codepage (MBCS) ex. shift-jis in Windows XP Japanese edition.*/ from_cp = CP_ACP; } else if (strmatch(fromcode, "mac")) { /* Macintosh codepage */ from_cp = CP_MACCP; } else if (strmatch(fromcode, "oem")) { /* OEM localized default codepage */ from_cp = CP_OEMCP; } else if (strmatch(fromcode, "utf-7")) { /* UTF-7 codepage */ from_cp = CP_UTF7; } else if (strmatch(fromcode, "utf-8")) { /* UTF-8 codepage */ from_cp = CP_UTF8; } else if (strmatch(fromcode, "sjis") || strmatch(fromcode, "sjis-win") || strmatch(fromcode, "shift-jis") || strmatch(fromcode, "shift_jis")) { /* sjis codepage = 932 */ from_cp = 932; } else if (fromcode[0] >= '0' && fromcode[0] <= '9') { /* codepage number */ from_cp = atoi(fromcode); if (! IsValidCodePage(from_cp)) { j_printerr("Error: codepage #%d not found\n", from_cp); *enable_conv = FALSE; return FALSE; } } else { j_printerr("Error: unknown source codepage \"%s\"\n", fromcode); j_printerr("Error: valids are \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\" and codepage number\n"); j_printerr("Error: the default local charcode can be speicified by \"ansi\".\n"); *enable_conv = FALSE; return FALSE; } /* determine the target character set */ if (strmatch(tocode, "ansi")) { /* ANSI codepage (MBCS) ex. shift-jis in Windows XP Japanese edition.*/ to_cp = CP_ACP; } else if (strmatch(tocode, "mac")) { /* Macintosh codepage */ to_cp = CP_MACCP; } else if (strmatch(tocode, "oem")) { /* OEM codepage */ to_cp = CP_OEMCP; } else if (strmatch(tocode, "utf-7")) { /* UTF-7 codepage */ to_cp = CP_UTF7; } else if (strmatch(tocode, "utf-8")) { /* UTF-8 codepage */ to_cp = CP_UTF8; } else if (strmatch(tocode, "sjis") || strmatch(tocode, "sjis-win") || strmatch(tocode, "shift-jis") || strmatch(tocode, "shift_jis")) { /* sjis codepage = 932 */ to_cp = 932; } else if (tocode[0] >= '0' && tocode[0] <= '9') { /* codepage number */ to_cp = atoi(tocode); if (! IsValidCodePage(to_cp)) { j_printerr("Error: codepage #%d not found\n", to_cp); *enable_conv = FALSE; return FALSE; } } else { j_printerr("Error: unknown target codepage \"%s\"\n", tocode); j_printerr("Error: valids are \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\" and codepage number\n"); j_printerr("Error: the default local charcode can be speicified by \"ansi\".\n"); *enable_conv = FALSE; return FALSE; } /* check whether the actual conversion is needed */ src_p = from_cp; dst_p = to_cp; if (src_p == CP_ACP) src_p = GetACP(); if (dst_p == CP_ACP) dst_p = GetACP(); if (src_p == CP_OEMCP) src_p = GetOEMCP(); if (dst_p == CP_OEMCP) dst_p = GetOEMCP(); if (src_p == dst_p) { if (euctosjis == FALSE) { only_euc_conv = FALSE; *enable_conv = FALSE; } else { only_euc_conv = TRUE; *enable_conv = TRUE; } } else { only_euc_conv = FALSE; *enable_conv = TRUE; } } return TRUE;}#define UNICODE_BUFFER_SIZE 4096 ///< Buffer length to use for unicode conversionstatic wchar_t unibuf[UNICODE_BUFFER_SIZE]; ///< Local work area for unicode conversion/** * Apply charset conversion to a string using win32 functions * * @param instr [in] source string * @param outstr [in] destination buffer * @param maxoutlen [in] allocated length of outstr in byte. * * @return either of instr or outstr, that holds the result string. * */char *charconv_win32(char *instr, char *outstr, int maxoutlen){ int unilen, newlen; char *srcbuf; srcbuf = instr; if (euctosjis == TRUE) { /* euc->sjis conversion */ toStringSJIS(instr, outstr, maxoutlen); srcbuf = outstr; if (only_euc_conv) { return(outstr); } } /* get length of unicode string */ unilen = MultiByteToWideChar(from_cp, 0, srcbuf, -1, NULL, 0); if (unilen <= 0) { j_printerr("conversion error?\n"); return(instr); } if (unilen > UNICODE_BUFFER_SIZE) { j_printerr("InternalError: unicode buffer size exceeded (%d > %d)!\n", unilen, UNICODE_BUFFER_SIZE); return(instr); } /* convert source string to unicode */ MultiByteToWideChar(from_cp, 0, srcbuf, -1, unibuf, unilen); /* get length of target string */ newlen = WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, 0, NULL, NULL); if (newlen <= 0) { j_printerr("conversion error?\n"); return(instr); } if (newlen > maxoutlen) { j_printerr("InternalError: target buffer size exceeded (%d > %d)!\n", newlen, maxoutlen); return(instr); } /* convert unicode to target string */ WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, newlen, NULL, NULL); return(outstr);}#endif /* USE_WIN32_MULTIBYTE */#endif /* CHARACTER_CONVERSION */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -